From 97c053ba7353b4811d34e76f3b42007689c903d0 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 06:08:13 +0000 Subject: [PATCH 01/85] update setup --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index cc495824e..0ecbf959e 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. + +# -*- coding: utf-8 -*- import io import os import numpy From 72967801495c2cdad4ec30399d72355364cc713a Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 06:37:02 +0000 Subject: [PATCH 02/85] fix setup --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0ecbf959e..f01974e8f 100644 --- a/setup.py +++ b/setup.py @@ -96,7 +96,12 @@ setup( long_description=long_description, long_description_content_type="text/markdown", python_requires=REQUIRES_PYTHON, - packages=find_packages(exclude=("tests",)), + packages=find_packages( + exclude=( + "tests", + "examples", + ) + ), # if your package is a single module, use this instead of 'packages': # py_modules=['qlib'], entry_points={ From 936d5abb1f0cd2e9f19269be29610a1198e390f3 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 06:58:46 +0000 Subject: [PATCH 03/85] fix docs req --- docs/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index ea9582e0a..b94599e7b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,3 @@ -Cython==0.29.21 \ No newline at end of file +Cython==0.29.21 +scipy +scikit-learn \ No newline at end of file From e2d862bfb2735163e28fa63154a56a746964209c Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 07:03:04 +0000 Subject: [PATCH 04/85] fix system package --- .readthedocs.yml | 3 +-- docs/requirements.txt | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 657981243..2abc662ce 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -18,5 +18,4 @@ python: install: - requirements: docs/requirements.txt - method: setuptools - path: . - system_packages: true \ No newline at end of file + path: . \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index b94599e7b..b6fe7ec93 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1 @@ Cython==0.29.21 -scipy -scikit-learn \ No newline at end of file From 03ab67ad5cfa7d8af5a1e888c2512f09cdbbdfd3 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 07:08:12 +0000 Subject: [PATCH 05/85] fix req --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b6fe7ec93..03432f9fe 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ -Cython==0.29.21 +Cython +numpy \ No newline at end of file From 59282c896543ec27ac5a23d61a810309cab0d83a Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 07:11:38 +0000 Subject: [PATCH 06/85] fix req --- docs/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 03432f9fe..08e80edd6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,4 @@ Cython -numpy \ No newline at end of file +numpy +scipy +scikit-learn \ No newline at end of file From eb67f1037aded9e999fca3338e61e2a2b10a6774 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 07:23:20 +0000 Subject: [PATCH 07/85] update setup --- setup.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index f01974e8f..b731bb92a 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,5 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - -# -*- coding: utf-8 -*- import io import os import numpy @@ -96,12 +94,7 @@ setup( long_description=long_description, long_description_content_type="text/markdown", python_requires=REQUIRES_PYTHON, - packages=find_packages( - exclude=( - "tests", - "examples", - ) - ), + packages=find_packages(exclude=("tests",)), # if your package is a single module, use this instead of 'packages': # py_modules=['qlib'], entry_points={ @@ -128,4 +121,4 @@ setup( "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", ], -) +) \ No newline at end of file From 9c482ebbe2e1a47c92e71c27f81aa18f629421c8 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 10 Dec 2020 07:30:00 +0000 Subject: [PATCH 08/85] black format --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b731bb92a..cc495824e 100644 --- a/setup.py +++ b/setup.py @@ -121,4 +121,4 @@ setup( "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", ], -) \ No newline at end of file +) From b04d2c39c84633a22146e99dfe829f3655cdd4dd Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 10 Dec 2020 17:42:38 +0800 Subject: [PATCH 09/85] Update CI --- .github/workflows/test.yml | 65 +++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 81bbea097..be6f6b75d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,8 +12,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [windows-latest, ubuntu-16.04, ubuntu-18.04, macos-latest] - python-version: [3.7, 3.8] + os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest] + python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 @@ -22,13 +22,22 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - + - name: Install dependencies - run: | - pip install --upgrade cython - pip install numpy jupyter jupyter_contrib_nbextensions - pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't - python setup.py install + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m pip install --upgrade cython + $CONDA\\python.exe -m pip install --upgrade cython + $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions + $CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't + $CONDA\\python.exe setup.py install + else + sudo $CONDA/bin/python -m pip install --upgrade cython + sudo $CONDA/bin/python -m pip install numpy jupyter jupyter_contrib_nbextensions + sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't + sudo $CONDA/bin/python setup.py install + fi + shell: bash - name: Install Lightgbm for MacOS if: runner.os == 'macOS' @@ -38,23 +47,49 @@ jobs: - name: Install test dependencies run: | - python -m pip install --upgrade pip - pip install black pytest - + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m pip install --upgrade pip + $CONDA\\python.exe -m pip install black pytest + else + sudo $CONDA/bin/python -m pip install --upgrade pip + sudo $CONDA/bin/python -m pip install black pytest + fi + shell: bash + - name: Lint with Black run: | cd .. - python -m black qlib -l 120 --check --diff + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m black qlib -l 120 --check --diff + else + $CONDA/bin/python -m black qlib -l 120 --check --diff + fi + shell: bash - name: Unit tests with Pytest run: | cd tests - pytest . --durations=0 + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m pytest . --durations=0 + else + $CONDA/bin/python -m pytest . --durations=0 + fi + shell: bash - name: Test data downloads run: | - python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + else + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + fi + shell: bash - name: Test workflow by config run: | - qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml + else + $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + fi + shell: bash From 99f3820e42304be7d8013764c91b11be0af62248 Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 10 Dec 2020 18:03:30 +0800 Subject: [PATCH 10/85] Update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8895ecf18..ca91925dc 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Also, users can install ``Qlib`` by the source code according to the following s pip install numpy pip install --upgrade cython ``` + **Note**: please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. * Clone the repository and install ``Qlib``: From 9307bcc8d1a811cd90f77ba851ee4b93cff047d3 Mon Sep 17 00:00:00 2001 From: G_will Date: Thu, 10 Dec 2020 18:14:55 +0800 Subject: [PATCH 11/85] fix typo fix typo --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index cc495824e..065e30605 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,6 @@ REQUIRED = [ "lightgbm", "tornado", "joblib>=0.17.0", - "fire>=0.3.1", "ruamel.yaml>=0.16.12", ] From d642c7b6eac4445424f69e3362719ebb6a46835b Mon Sep 17 00:00:00 2001 From: Jactus Date: Fri, 11 Dec 2020 09:55:37 +0800 Subject: [PATCH 12/85] Update benchmark performance --- .github/workflows/test.yml | 1 - examples/benchmarks/README.md | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be6f6b75d..64ff99dfe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,7 +26,6 @@ jobs: - name: Install dependencies run: | if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe -m pip install --upgrade cython $CONDA\\python.exe -m pip install --upgrade cython $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions $CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index c561906d6..37677a99e 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -25,8 +25,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | XGBoost | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | | LightGBM | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | | MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 | -| TFT | Alpha158 (with selected 20 features) | 0.0335±0.00 | 0.2009±0.01| 0.0090±0.00 | 0.0553±0.01 | 0.0605±0.01 | 0.5438±0.12| -0.1772±0.03 | -| GRU | Alpha158 (with selected 20 features) | 0.0313±0.00 | 0.2427±0.01 | 0.0416±0.00 | 0.3370±0.01 | 0.0335±0.01 | 0.4808±0.22 | -0.1112±0.03 | -| LSTM | Alpha158 (with selected 20 features) | 0.0337±0.01 | 0.2562±0.05 | 0.0427±0.01 | 0.3392±0.04 | 0.0269±0.06 | 0.3385±0.74 | -0.1285±0.04 | -| ALSTM | Alpha158 (with selected 20 features) | 0.0366±0.00 | 0.2803±0.04 | 0.0478±0.00 | 0.3770±0.02 | 0.0520±0.03 | 0.7115±0.30 | -0.0986±0.01 | -| GATs | Alpha158 (with selected 20 features) | 0.0355±0.00 | 0.2576±0.02 | 0.0465±0.00 | 0.3585±0.00 | 0.0509±0.02 | 0.7212±0.22 | -0.0821±0.01 | \ No newline at end of file +| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.8| -0.1754±0.02 | +| GRU | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | +| LSTM | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | +| ALSTM | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | +| GATs | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 | \ No newline at end of file From c10955d0263686b740401714292ee7ca19c032f7 Mon Sep 17 00:00:00 2001 From: Jactus Date: Fri, 11 Dec 2020 14:33:16 +0800 Subject: [PATCH 13/85] Update tft --- examples/benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 37677a99e..2557eb9b3 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -25,7 +25,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | XGBoost | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | | LightGBM | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | | MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 | -| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.8| -0.1754±0.02 | +| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.08| -0.1754±0.02 | | GRU | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | | LSTM | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | | ALSTM | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | From d6dd423dc223447dc74f3d7a97b37fec73413323 Mon Sep 17 00:00:00 2001 From: Jactus Date: Fri, 11 Dec 2020 09:55:37 +0800 Subject: [PATCH 14/85] Update benchmark performance --- .github/workflows/test.yml | 1 - examples/benchmarks/README.md | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be6f6b75d..64ff99dfe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,7 +26,6 @@ jobs: - name: Install dependencies run: | if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe -m pip install --upgrade cython $CONDA\\python.exe -m pip install --upgrade cython $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions $CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index c561906d6..37677a99e 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -25,8 +25,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | XGBoost | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | | LightGBM | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | | MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 | -| TFT | Alpha158 (with selected 20 features) | 0.0335±0.00 | 0.2009±0.01| 0.0090±0.00 | 0.0553±0.01 | 0.0605±0.01 | 0.5438±0.12| -0.1772±0.03 | -| GRU | Alpha158 (with selected 20 features) | 0.0313±0.00 | 0.2427±0.01 | 0.0416±0.00 | 0.3370±0.01 | 0.0335±0.01 | 0.4808±0.22 | -0.1112±0.03 | -| LSTM | Alpha158 (with selected 20 features) | 0.0337±0.01 | 0.2562±0.05 | 0.0427±0.01 | 0.3392±0.04 | 0.0269±0.06 | 0.3385±0.74 | -0.1285±0.04 | -| ALSTM | Alpha158 (with selected 20 features) | 0.0366±0.00 | 0.2803±0.04 | 0.0478±0.00 | 0.3770±0.02 | 0.0520±0.03 | 0.7115±0.30 | -0.0986±0.01 | -| GATs | Alpha158 (with selected 20 features) | 0.0355±0.00 | 0.2576±0.02 | 0.0465±0.00 | 0.3585±0.00 | 0.0509±0.02 | 0.7212±0.22 | -0.0821±0.01 | \ No newline at end of file +| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.8| -0.1754±0.02 | +| GRU | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | +| LSTM | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | +| ALSTM | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | +| GATs | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 | \ No newline at end of file From bba94d72dc60ba558faef3dff63cbe1c2b62c0f3 Mon Sep 17 00:00:00 2001 From: Jactus Date: Fri, 11 Dec 2020 14:54:08 +0800 Subject: [PATCH 15/85] Add author names --- README.md | 18 +++++++++--------- examples/benchmarks/README.md | 30 +++++++++++++++--------------- examples/benchmarks/SFM/README.md | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index ca91925dc..e39071fdb 100644 --- a/README.md +++ b/README.md @@ -191,16 +191,16 @@ The automatic workflow may not suite the research workflow of all Quant research # [Quant Model Zoo](examples/benchmarks) Here is a list of models built on `Qlib`. -- [GBDT based on LightGBM](qlib/contrib/model/gbdt.py) -- [GBDT based on Catboost](qlib/contrib/model/catboost_model.py) -- [GBDT based on XGBoost](qlib/contrib/model/xgboost.py) +- [GBDT based on LightGBM (Guolin Ke, et al.)](qlib/contrib/model/gbdt.py) +- [GBDT based on Catboost (Liudmila Prokhorenkova, et al.)](qlib/contrib/model/catboost_model.py) +- [GBDT based on XGBoost (Tianqi Chen, et al.)](qlib/contrib/model/xgboost.py) - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py) -- [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py) -- [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py) -- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py) -- [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py) -- [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py) -- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) +- [GRU based on pytorch (Kyunghyun Cho, et al.)](qlib/contrib/model/pytorch_gru.py) +- [LSTM based on pytorcn (Sepp Hochreiter, et al.)](qlib/contrib/model/pytorch_lstm.py) +- [ALSTM based on pytorcn (Yao Qin, et al.)](qlib/contrib/model/pytorch_alstm.py) +- [GATs based on pytorch (Petar Velickovic, et al.)](qlib/contrib/model/pytorch_gats.py) +- [SFM based on pytorch (Liheng Zhang, et al.)](qlib/contrib/model/pytorch_sfm.py) +- [TFT based on tensorflow (Bryan Lim, et al.)](examples/benchmarks/TFT/tft.py) Your PR of new Quant models is highly welcomed. diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 37677a99e..634c88a7b 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -8,25 +8,25 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |---|---|---|---|---|---|---|---|---| | Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0655±0.00 | -0.6985±0.00| -0.2961±0.00 | -| CatBoost | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 | -| XGBoost | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 | -| LightGBM | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 | +| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 | +| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 | +| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 | | MLP | Alpha360 | 0.0253±0.01 | 0.1954±0.05| 0.0329±0.00 | 0.2687±0.04 | 0.0161±0.01 | 0.1989±0.19| -0.1275±0.03 | -| GRU | Alpha360 | 0.0503±0.01 | 0.3946±0.06| 0.0588±0.00 | 0.4737±0.05 | 0.0799±0.02 | 1.0940±0.26| -0.0810±0.03 | -| LSTM | Alpha360 | 0.0466±0.01 | 0.3644±0.06| 0.0555±0.00 | 0.4451±0.04 | 0.0783±0.05 | 1.0539±0.65| -0.0844±0.03 | -| ALSTM | Alpha360 | 0.0472±0.00 | 0.3558±0.04| 0.0577±0.00 | 0.4522±0.04 | 0.0522±0.02 | 0.7090±0.32| -0.1059±0.03 | -| GATs | Alpha360 | 0.0480±0.00 | 0.3555±0.02| 0.0598±0.00 | 0.4616±0.01 | 0.0857±0.03 | 1.1317±0.42| -0.0917±0.01 | +| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0503±0.01 | 0.3946±0.06| 0.0588±0.00 | 0.4737±0.05 | 0.0799±0.02 | 1.0940±0.26| -0.0810±0.03 | +| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0466±0.01 | 0.3644±0.06| 0.0555±0.00 | 0.4451±0.04 | 0.0783±0.05 | 1.0539±0.65| -0.0844±0.03 | +| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0472±0.00 | 0.3558±0.04| 0.0577±0.00 | 0.4522±0.04 | 0.0522±0.02 | 0.7090±0.32| -0.1059±0.03 | +| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0480±0.00 | 0.3555±0.02| 0.0598±0.00 | 0.4616±0.01 | 0.0857±0.03 | 1.1317±0.42| -0.0917±0.01 | ## Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |---|---|---|---|---|---|---|---|---| | Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 | -| CatBoost | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1567±0.00| -0.0787±0.00 | -| XGBoost | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | -| LightGBM | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | +| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1567±0.00| -0.0787±0.00 | +| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | +| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | | MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 | -| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.8| -0.1754±0.02 | -| GRU | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | -| LSTM | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | -| ALSTM | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | -| GATs | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 | \ No newline at end of file +| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.08| -0.1754±0.02 | +| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | +| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | +| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | +| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 | \ No newline at end of file diff --git a/examples/benchmarks/SFM/README.md b/examples/benchmarks/SFM/README.md index 5f74c15d2..ff54145af 100644 --- a/examples/benchmarks/SFM/README.md +++ b/examples/benchmarks/SFM/README.md @@ -1,3 +1,3 @@ # State-Frequency-Memory - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. -- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [https://www.cs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](https://www.cs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.) \ No newline at end of file +- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf) \ No newline at end of file From 8ea45802df084f17dc50084903dd30307a2ddb8c Mon Sep 17 00:00:00 2001 From: you-n-g Date: Sat, 12 Dec 2020 14:04:21 +0800 Subject: [PATCH 16/85] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e39071fdb..f2186c2af 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,8 @@ Users can easily install ``Qlib`` by pip according to the following command pip install pyqlib ``` +**NOTE**: pip will install latest stable qlib. However the main branch of qlib is in active development. If you want to test the latest scripts or functions in main branch. Please install qlib with the methods below. + Also, users can install ``Qlib`` by the source code according to the following steps: * Before installing ``Qlib`` from source, users need to install some dependencies: From b0e7a856010c0b9f41b3d296df26df6f077c3884 Mon Sep 17 00:00:00 2001 From: bxdd Date: Sat, 12 Dec 2020 08:34:52 +0000 Subject: [PATCH 17/85] update readme --- README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f2186c2af..41ea11cbe 100644 --- a/README.md +++ b/README.md @@ -71,9 +71,9 @@ Users can easily install ``Qlib`` by pip according to the following command pip install pyqlib ``` -**NOTE**: pip will install latest stable qlib. However the main branch of qlib is in active development. If you want to test the latest scripts or functions in main branch. Please install qlib with the methods below. +**NOTE**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below. -Also, users can install ``Qlib`` by the source code according to the following steps: +Also, users can install the latest developed ``Qlib`` by the source code according to the following steps: * Before installing ``Qlib`` from source, users need to install some dependencies: @@ -81,15 +81,20 @@ Also, users can install ``Qlib`` by the source code according to the following s pip install numpy pip install --upgrade cython ``` - **Note**: please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. - -* Clone the repository and install ``Qlib``: - - ```bash - git clone https://github.com/microsoft/qlib.git && cd qlib - python setup.py install - ``` + **Note**: Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. +* Clone the repository and install ``Qlib`` as follows. + * If you haven't installed qlib by the command ``pip install pyqlib`` before: + ```bash + git clone https://github.com/microsoft/qlib.git && cd qlib + python setup.py install + ``` + * If you have installed the stable by the command ``pip install pyqlib``: + ```bash + git clone https://github.com/microsoft/qlib.git && cd qlib + pip install . + ``` + **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't** do. ## Data Preparation Load and prepare data by running the following code: From 9d19294f159ca02ac90e84b580ed09f880acf1e0 Mon Sep 17 00:00:00 2001 From: bxdd Date: Sat, 12 Dec 2020 08:42:25 +0000 Subject: [PATCH 18/85] update Note --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41ea11cbe..b4fb14e1f 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Users can easily install ``Qlib`` by pip according to the following command pip install pyqlib ``` -**NOTE**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below. +**Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below. Also, users can install the latest developed ``Qlib`` by the source code according to the following steps: From e49b59032204b46db0fad6bfdc34b0f2e5cb9df7 Mon Sep 17 00:00:00 2001 From: Young Date: Sat, 12 Dec 2020 09:51:52 +0000 Subject: [PATCH 19/85] Release qlib 0.6.1 --- README.md | 6 +++--- qlib/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b4fb14e1f..121d88f4f 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Users can easily install ``Qlib`` by pip according to the following command **Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below. -Also, users can install the latest developed ``Qlib`` by the source code according to the following steps: +Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps: * Before installing ``Qlib`` from source, users need to install some dependencies: @@ -89,12 +89,12 @@ Also, users can install the latest developed ``Qlib`` by the source code accordi git clone https://github.com/microsoft/qlib.git && cd qlib python setup.py install ``` - * If you have installed the stable by the command ``pip install pyqlib``: + * If you have already installed the stable version by the command ``pip install pyqlib``: ```bash git clone https://github.com/microsoft/qlib.git && cd qlib pip install . ``` - **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't** do. + **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't**. ## Data Preparation Load and prepare data by running the following code: diff --git a/qlib/__init__.py b/qlib/__init__.py index f79b8c4f5..a637b8e3a 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. -__version__ = "0.6.0.dev" +__version__ = "0.6.1" import os diff --git a/setup.py b/setup.py index 065e30605..dc67f71c0 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ NAME = "pyqlib" DESCRIPTION = "A Quantitative-research Platform" REQUIRES_PYTHON = ">=3.5.0" -VERSION = "0.6.0.dev" +VERSION = "0.6.1" # Detect Cython try: From e2bdef7ffef91624b36a306303d5b1902f472b43 Mon Sep 17 00:00:00 2001 From: Young Date: Sat, 12 Dec 2020 10:09:18 +0000 Subject: [PATCH 20/85] update version number to dev --- qlib/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index a637b8e3a..98920ed04 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. -__version__ = "0.6.1" +__version__ = "0.6.1.dev" import os diff --git a/setup.py b/setup.py index dc67f71c0..109a6b032 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ NAME = "pyqlib" DESCRIPTION = "A Quantitative-research Platform" REQUIRES_PYTHON = ">=3.5.0" -VERSION = "0.6.1" +VERSION = "0.6.1.dev" # Detect Cython try: From c8f9b1162d1c63bdc9b87e21e2c2ce7709162040 Mon Sep 17 00:00:00 2001 From: bxdd <45119470+bxdd@users.noreply.github.com> Date: Sat, 12 Dec 2020 19:01:00 +0800 Subject: [PATCH 21/85] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 121d88f4f..1fa3f8cc7 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor git clone https://github.com/microsoft/qlib.git && cd qlib pip install . ``` - **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't**. + **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't** do. ## Data Preparation Load and prepare data by running the following code: From 422d1d8c932abcaf5a3d9b8eb73a22765809c69e Mon Sep 17 00:00:00 2001 From: bxdd <45119470+bxdd@users.noreply.github.com> Date: Sat, 12 Dec 2020 19:41:16 +0800 Subject: [PATCH 22/85] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1fa3f8cc7..fdb3267c4 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor git clone https://github.com/microsoft/qlib.git && cd qlib pip install . ``` - **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't** do. + **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**. ## Data Preparation Load and prepare data by running the following code: From 1bab07e419c9a6bc42f4f7bdcf86b6c922a1718b Mon Sep 17 00:00:00 2001 From: you-n-g Date: Sun, 13 Dec 2020 22:45:07 +0800 Subject: [PATCH 23/85] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fdb3267c4..5d65aa579 100644 --- a/README.md +++ b/README.md @@ -138,10 +138,10 @@ Users could create the same dataset with it. ## Auto Quant Research Workflow Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: -1. Quant Research Workflow: Run `qrun` with lightgbm workflow config ([workflow_config_lightgbm.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm.yaml)) as following. +1. Quant Research Workflow: Run `qrun` with lightgbm workflow config ([workflow_config_lightgbm_Alpha158.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml) as following. ```bash cd examples # Avoid running program under the directory contains `qlib` - qrun benchmarks/LightGBM/workflow_config_lightgbm.yaml + qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml ``` The result of `qrun` is as follows, please refer to please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. From ea4fe1577b843f27f4d3fce2c1b0cf29d423f1a0 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Mon, 14 Dec 2020 13:05:12 +0800 Subject: [PATCH 24/85] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d65aa579..701006992 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ This quick start guide tries to demonstrate ## Installation -Users can easily install ``Qlib`` by pip according to the following command +Users can easily install ``Qlib`` by pip according to the following command(Currently, Qlib only support Python 3.6, 3.7 and 3.8). ```bash pip install pyqlib From c217e7c479a09eb41a57f9f26a23dcf0ad2332c5 Mon Sep 17 00:00:00 2001 From: Yifan Deng Date: Mon, 14 Dec 2020 01:35:03 +0800 Subject: [PATCH 25/85] Update ops.py Fix the bug when Sign followed by True/False --- qlib/data/ops.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index e17c0e4e6..72cfa4520 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -143,6 +143,11 @@ class Sign(ElemOperator): def __init__(self, feature): super(Sign, self).__init__(feature, "sign") + def _load_internal(self, instrument, start_index, end_index, freq): + series = self.feature.load(instrument, start_index, end_index, freq) + series = series.astype(np.float32) + return getattr(np, self.func)(series) + class Log(ElemOperator): """Feature Log From 6b8824dd298c33872a1364057204b5871b79444a Mon Sep 17 00:00:00 2001 From: "Yifan Deng (FA Talent)" Date: Mon, 14 Dec 2020 16:09:21 +0800 Subject: [PATCH 26/85] Update Sign in ops.py --- qlib/data/ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 72cfa4520..8c7699fb8 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -144,7 +144,11 @@ class Sign(ElemOperator): super(Sign, self).__init__(feature, "sign") def _load_internal(self, instrument, start_index, end_index, freq): + """ + To avoid error raised by bool type input, we transform the data into float32. + """ series = self.feature.load(instrument, start_index, end_index, freq) + # TODO: More precision types should be configurable series = series.astype(np.float32) return getattr(np, self.func)(series) From cb0b6fcdaa727f70f76eef22018e1d00dc1f5f41 Mon Sep 17 00:00:00 2001 From: Jactus Date: Mon, 14 Dec 2020 18:08:24 +0800 Subject: [PATCH 27/85] Update CI and script --- .github/workflows/test.yml | 86 ++++++++++++++++++++++++-------------- examples/run_all_model.py | 3 +- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 64ff99dfe..637b0c291 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,9 +22,58 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + + - name: Lint with Black + run: | + cd .. + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m pip install black + $CONDA\\python.exe -m black qlib -l 120 --check --diff + else + sudo $CONDA/bin/python -m pip install black + $CONDA/bin/python -m black qlib -l 120 --check --diff + fi + shell: bash + + # Test Qlib installed with pip + - name: Install Qlib with pip + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml --user + else + sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml + fi + shell: bash - - name: Install dependencies - run: | + - name: Install Lightgbm for MacOS + if: runner.os == 'macOS' + run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" + HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm + + - name: Test data downloads + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + else + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + fi + shell: bash + + - name: Test workflow by config (install from pip) + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml + $CONDA\\python.exe -m pip uninstall -y pyqlib + else + $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + sudo $CONDA/bin/python -m pip uninstall -y pyqlib + fi + shell: bash + + # Test Qlib installed from source + - name: Install Qlib from source + run: | if [ "$RUNNER_OS" == "Windows" ]; then $CONDA\\python.exe -m pip install --upgrade cython $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions @@ -36,13 +85,7 @@ jobs: sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't sudo $CONDA/bin/python setup.py install fi - shell: bash - - - name: Install Lightgbm for MacOS - if: runner.os == 'macOS' - run: | - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" - HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm + shell: bash - name: Install test dependencies run: | @@ -54,16 +97,6 @@ jobs: sudo $CONDA/bin/python -m pip install black pytest fi shell: bash - - - name: Lint with Black - run: | - cd .. - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe -m black qlib -l 120 --check --diff - else - $CONDA/bin/python -m black qlib -l 120 --check --diff - fi - shell: bash - name: Unit tests with Pytest run: | @@ -73,22 +106,13 @@ jobs: else $CONDA/bin/python -m pytest . --durations=0 fi - shell: bash + shell: bash - - name: Test data downloads - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - else - $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - fi - shell: bash - - - name: Test workflow by config + - name: Test workflow by config (install from source) run: | if [ "$RUNNER_OS" == "Windows" ]; then $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml else $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml fi - shell: bash + shell: bash \ No newline at end of file diff --git a/examples/run_all_model.py b/examples/run_all_model.py index 505a20bcb..31a82a569 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -68,10 +68,9 @@ def only_allow_defined_args(function_to_decorate): def handler(signum, frame): os.system("kill -9 %d" % os.getpid()) - -signal.signal(signal.SIGTSTP, handler) signal.signal(signal.SIGINT, handler) + # function to calculate the mean and std of a list in the results dictionary def cal_mean_std(results) -> dict: mean_std = dict() From 8f6ab0af54dcb7bbbc0f11f3ecebd7496a164743 Mon Sep 17 00:00:00 2001 From: Jactus Date: Mon, 14 Dec 2020 18:11:12 +0800 Subject: [PATCH 28/85] Format --- examples/run_all_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/run_all_model.py b/examples/run_all_model.py index 31a82a569..d587eff15 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -68,6 +68,7 @@ def only_allow_defined_args(function_to_decorate): def handler(signum, frame): os.system("kill -9 %d" % os.getpid()) + signal.signal(signal.SIGINT, handler) From 3d47dd78c880f2ae0428e445411694773329ad86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Domaga=C5=82a?= <54290898+maciejdomagala@users.noreply.github.com> Date: Tue, 15 Dec 2020 09:19:42 +0100 Subject: [PATCH 29/85] Typo fix --- docs/component/recorder.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/component/recorder.rst b/docs/component/recorder.rst index baf12448b..baf8f2bca 100644 --- a/docs/component/recorder.rst +++ b/docs/component/recorder.rst @@ -91,7 +91,7 @@ Record Template The ``RecordTemp`` class is a class that enables generate experiment results such as IC and backtest in a certain format. We have provided three different `Record Template` class: -- ``SignalRecord``: This class generates the `preidction` results of the model. +- ``SignalRecord``: This class generates the `prediction` results of the model. - ``SigAnaRecord``: This class generates the `IC`, `ICIR`, `Rank IC` and `Rank ICIR` of the model. - ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_. From 911edd78393b736959c0aff2f9f6eeae60c52570 Mon Sep 17 00:00:00 2001 From: Jactus Date: Tue, 15 Dec 2020 17:32:16 +0800 Subject: [PATCH 30/85] Add stale bot --- .github/stale.yml | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 .github/stale.yml diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 000000000..3d025c987 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,62 @@ +# Configuration for probot-stale - https://github.com/probot/stale + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 60 + +# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. +# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. +daysUntilClose: 7 + +# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) +onlyLabels: [] + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - bug + - pinned + - security + - "[Status] Maybe Later" + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: false + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: false + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: false + +# Label to use when marking as stale +staleLabel: wontfix + +# Comment to post when marking as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. + +# Comment to post when removing the stale label. +# unmarkComment: > +# Your comment here. + +# Comment to post when closing a stale Issue or Pull Request. +# closeComment: > +# Your comment here. + +# Limit the number of actions per hour, from 1-30. Default is 30 +limitPerRun: 30 + +# Limit to only `issues` or `pulls` +# only: issues + +# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': +# pulls: +# daysUntilStale: 30 +# markComment: > +# This pull request has been automatically marked as stale because it has not had +# recent activity. It will be closed if no further activity occurs. Thank you +# for your contributions. + +# issues: +# exemptLabels: +# - confirmed \ No newline at end of file From 192c2dc5ef5fea3b2cd24efca51d95e72012401f Mon Sep 17 00:00:00 2001 From: Jactus Date: Tue, 15 Dec 2020 16:15:33 +0800 Subject: [PATCH 31/85] Add demo --- README.md | 7 +++++-- docs/_static/demo.sh | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 docs/_static/demo.sh diff --git a/README.md b/README.md index 701006992..2ae36827e 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,10 @@ At the module level, Qlib is a platform that consists of the above components. T This quick start guide tries to demonstrate 1. It's very easy to build a complete Quant research workflow and try your ideas with _Qlib_. -1. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment. +2. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment. + +Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how to install ``Qlib``, and run LightGBM with ``qrun``. **But**, please make sure you have already prepared the data following the [instruction](#data-preparation). + ## Installation @@ -303,4 +306,4 @@ provided by the bot. You will only need to do this once across all repos using o This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. \ No newline at end of file diff --git a/docs/_static/demo.sh b/docs/_static/demo.sh new file mode 100644 index 000000000..bd2367a92 --- /dev/null +++ b/docs/_static/demo.sh @@ -0,0 +1,12 @@ +#!/bin/sh +git clone https://github.com/microsoft/qlib.git +cd qlib +ls +pip install pyqlib +# or +# pip install numpy +# pip install --upgrade cython +# python setup.py install +cd examples +ls +qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml \ No newline at end of file From 3f85af05e5a2da0d2f37f1428248a52f3de837a6 Mon Sep 17 00:00:00 2001 From: G_will Date: Tue, 15 Dec 2020 10:50:17 +0800 Subject: [PATCH 32/85] Refactor to Python3 style --- examples/benchmarks/TFT/expt_settings/configs.py | 2 +- examples/benchmarks/TFT/libs/tft_model.py | 6 +++--- qlib/contrib/backtest/__init__.py | 1 - qlib/contrib/model/pytorch_nn.py | 2 +- qlib/contrib/model/pytorch_sfm.py | 2 +- qlib/contrib/online/operator.py | 2 +- qlib/contrib/report/graph.py | 4 ++-- qlib/contrib/tuner/config.py | 6 +++--- qlib/contrib/tuner/pipeline.py | 2 +- qlib/contrib/tuner/tuner.py | 2 +- qlib/data/_libs/expanding.pyx | 2 +- qlib/data/_libs/rolling.pyx | 2 +- qlib/data/cache.py | 6 +++--- qlib/data/client.py | 2 +- qlib/log.py | 2 +- qlib/portfolio/optimizer.py | 2 +- qlib/utils/__init__.py | 2 +- 17 files changed, 23 insertions(+), 24 deletions(-) diff --git a/examples/benchmarks/TFT/expt_settings/configs.py b/examples/benchmarks/TFT/expt_settings/configs.py index 6aef0c395..62aa68c38 100644 --- a/examples/benchmarks/TFT/expt_settings/configs.py +++ b/examples/benchmarks/TFT/expt_settings/configs.py @@ -25,7 +25,7 @@ import os import data_formatters.qlib_Alpha158 -class ExperimentConfig(object): +class ExperimentConfig: """Defines experiment configs and paths to outputs. Attributes: diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py index 658bae60f..b39f17825 100644 --- a/examples/benchmarks/TFT/libs/tft_model.py +++ b/examples/benchmarks/TFT/libs/tft_model.py @@ -320,7 +320,7 @@ class InterpretableMultiHeadAttention: return outputs, attn -class TFTDataCache(object): +class TFTDataCache: """Caches data for the TFT.""" _data_cache = {} @@ -348,7 +348,7 @@ class TFTDataCache(object): # TFT model definitions. -class TemporalFusionTransformer(object): +class TemporalFusionTransformer: """Defines Temporal Fusion Transformer. Attributes: @@ -972,7 +972,7 @@ class TemporalFusionTransformer(object): valid_quantiles = self.quantiles output_size = self.output_size - class QuantileLossCalculator(object): + class QuantileLossCalculator: """Computes the combined quantile loss for prespecified quantiles. Attributes: diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index 31746819c..6fcdf2142 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -# -*- coding: utf-8 -*- from .order import Order from .account import Account from .position import Position diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 308494443..a41eeabbb 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -296,7 +296,7 @@ class DNNModelPytorch(Model): self._fitted = True -class AverageMeter(object): +class AverageMeter: """Computes and stores the average and current value""" def __init__(self): diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index 15d945c89..ae175a202 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -464,7 +464,7 @@ class SFM(Model): return pd.Series(np.concatenate(preds), index=index) -class AverageMeter(object): +class AverageMeter: """Computes and stores the average and current value""" def __init__(self): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index a00e17d40..c8b44f578 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -21,7 +21,7 @@ from .executor import SimulatorExecutor from .executor import save_score_series, load_score_series -class Operator(object): +class Operator: def __init__(self, client: str): """ Parameters diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index 3fa688d36..70e382fb1 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -17,7 +17,7 @@ from plotly.figure_factory import create_distplot from ...utils import get_module_by_module_path -class BaseGraph(object): +class BaseGraph: """""" _name = None @@ -204,7 +204,7 @@ class HistogramGraph(BaseGraph): return _data -class SubplotsGraph(object): +class SubplotsGraph: """Create subplots same as df.plot(subplots=True) Simple package for `plotly.tools.subplots` diff --git a/qlib/contrib/tuner/config.py b/qlib/contrib/tuner/config.py index 4825ca092..f23d1b874 100644 --- a/qlib/contrib/tuner/config.py +++ b/qlib/contrib/tuner/config.py @@ -6,7 +6,7 @@ import copy import os -class TunerConfigManager(object): +class TunerConfigManager: def __init__(self, config_path): if not config_path: @@ -27,7 +27,7 @@ class TunerConfigManager(object): self.qlib_client_config = config.get("qlib_client", dict()) -class PipelineExperimentConfig(object): +class PipelineExperimentConfig: def __init__(self, config, TUNER_CONFIG_MANAGER): """ :param config: The config dict for tuner experiment @@ -53,7 +53,7 @@ class PipelineExperimentConfig(object): yaml.dump(TUNER_CONFIG_MANAGER.config, fp) -class OptimizationConfig(object): +class OptimizationConfig: def __init__(self, config, TUNER_CONFIG_MANAGER): self.report_type = config.get("report_type", "pred_long") diff --git a/qlib/contrib/tuner/pipeline.py b/qlib/contrib/tuner/pipeline.py index 3a76d071d..ee92db529 100644 --- a/qlib/contrib/tuner/pipeline.py +++ b/qlib/contrib/tuner/pipeline.py @@ -11,7 +11,7 @@ from ...log import get_module_logger, TimeInspector from ...utils import get_module_by_module_path -class Pipeline(object): +class Pipeline: GLOBAL_BEST_PARAMS_NAME = "global_best_params.json" diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 8da40bc69..2ce957859 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -19,7 +19,7 @@ from hyperopt import fmin, tpe from hyperopt import STATUS_OK, STATUS_FAIL -class Tuner(object): +class Tuner: def __init__(self, tuner_config, optim_config): self.logger = get_module_logger("Tuner", sh_level=logging.INFO) diff --git a/qlib/data/_libs/expanding.pyx b/qlib/data/_libs/expanding.pyx index 47bc49610..6c27c07eb 100644 --- a/qlib/data/_libs/expanding.pyx +++ b/qlib/data/_libs/expanding.pyx @@ -8,7 +8,7 @@ from libc.math cimport sqrt, isnan, NAN from libcpp.vector cimport vector -cdef class Expanding(object): +cdef class Expanding: """1-D array expanding""" cdef vector[double] barv cdef int na_count diff --git a/qlib/data/_libs/rolling.pyx b/qlib/data/_libs/rolling.pyx index 37d27ffa4..a18679a99 100644 --- a/qlib/data/_libs/rolling.pyx +++ b/qlib/data/_libs/rolling.pyx @@ -8,7 +8,7 @@ from libc.math cimport sqrt, isnan, NAN from libcpp.deque cimport deque -cdef class Rolling(object): +cdef class Rolling: """1-D array rolling""" cdef int window cdef deque[double] barv diff --git a/qlib/data/cache.py b/qlib/data/cache.py index 3fab2b527..d53f578b1 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -68,7 +68,7 @@ class MemCacheUnit(OrderedDict): self.popitem(last=False) -class MemCache(object): +class MemCache: """Memory cache.""" def __init__(self, mem_cache_size_limit=None, limit_type="length"): @@ -140,7 +140,7 @@ class MemCacheExpire: return value, expire -class CacheUtils(object): +class CacheUtils: LOCK_ID = "QLIB" @staticmethod @@ -224,7 +224,7 @@ class CacheUtils(object): current_cache_wlock.release() -class BaseProviderCache(object): +class BaseProviderCache: """Provider cache base class""" def __init__(self, provider): diff --git a/qlib/data/client.py b/qlib/data/client.py index 65a830f20..5244a7e45 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -12,7 +12,7 @@ from ..log import get_module_logger import pickle -class Client(object): +class Client: """A client class Provide the connection tool functions for ClientProvider. diff --git a/qlib/log.py b/qlib/log.py index 422a4c00b..6553dcb11 100644 --- a/qlib/log.py +++ b/qlib/log.py @@ -36,7 +36,7 @@ def get_module_logger(module_name, level=None): return module_logger -class TimeInspector(object): +class TimeInspector: timer_logger = get_module_logger("timer", level=logging.WARNING) diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 534a66e2d..0e7d27254 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -9,7 +9,7 @@ import scipy.optimize as so from typing import Optional, Union, Callable, List -class PortfolioOptimizer(object): +class PortfolioOptimizer: """Portfolio Optimizer The following optimization algorithms are supported: diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index ab67b67e3..ddc17c478 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -686,7 +686,7 @@ def flatten_dict(d, parent_key="", sep="."): #################### Wrapper ##################### -class Wrapper(object): +class Wrapper: """Wrapper class for anything that needs to set up during qlib.init""" def __init__(self): From aefbf3b5f147b8c8d31c9d520e373bc373e03a7e Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 15 Dec 2020 13:24:29 +0000 Subject: [PATCH 33/85] update collect info --- scripts/collect_info.py | 84 ++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/scripts/collect_info.py b/scripts/collect_info.py index c9a9440b9..ba53deb30 100644 --- a/scripts/collect_info.py +++ b/scripts/collect_info.py @@ -1,28 +1,66 @@ -import sys, platform +import sys +import platform import qlib +import fire +import pkg_resources +from pathlib import Path + +QLIB_PATH = Path(__file__).absolute().resolve().parent.parent -def linux_distribution(): - try: - return platform.linux_distribution() - except: - return "N/A" +class InfoCollector: + def sys(self): + """collect system related info""" + for method in ["system", "machine", "platform", "version"]: + print(getattr(platform, method)()) + + def py(self): + """collect Python related info""" + print("Python version: {}".format(sys.version.replace("\n", " "))) + + def qlib(self): + """collect qlib related info""" + print("Qlib version: {}".format(qlib.__version__)) + REQUIRED = [ + "numpy", + "pandas", + "scipy", + "requests", + "sacred", + "pymongo", + "python-socketio", + "redis", + "python-redis-lock", + "schedule", + "cvxpy", + "hyperopt", + "fire", + "statsmodels", + "xlrd", + "plotly", + "matplotlib", + "tables", + "pyyaml", + "mlflow", + "tqdm", + "loguru", + "lightgbm", + "tornado", + "joblib", + "fire", + "ruamel.yaml", + ] + + for package in REQUIRED: + version = pkg_resources.get_distribution(package).version + print(f"{package}=={version}") + + def all(self): + """collect all info""" + for method in ["sys", "py", "qlib"]: + getattr(self, method)() + print() -print("Qlib version: {} \n".format(qlib.__version__)) -print( - """Python version: {} \n -linux_distribution: {} -system: {} -machine: {} -platform: {} -version: {} -""".format( - sys.version.split("\n"), - linux_distribution(), - platform.system(), - platform.machine(), - platform.platform(), - platform.version(), - ) -) +if __name__ == "__main__": + fire.Fire(InfoCollector) From f7e775f941073f3b060e4dcbb0ce635c8072d38b Mon Sep 17 00:00:00 2001 From: Young Date: Wed, 16 Dec 2020 02:14:38 +0000 Subject: [PATCH 34/85] make message more friendly --- qlib/data/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 8c7699fb8..2067991fc 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -18,7 +18,7 @@ try: from ._libs.rolling import rolling_slope, rolling_rsquare, rolling_resi from ._libs.expanding import expanding_slope, expanding_rsquare, expanding_resi except ImportError as err: - print("Do not import qlib package in the repository directory!") + print("#### Do not import qlib package in the repository directory in case of importing qlib from . without compiling #####") raise __all__ = ( From 9b60214e0c52fda8adf3b587e367629247d31b0c Mon Sep 17 00:00:00 2001 From: Young Date: Wed, 16 Dec 2020 02:16:06 +0000 Subject: [PATCH 35/85] make info more friendly --- qlib/data/ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 2067991fc..7c13d345f 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -18,7 +18,9 @@ try: from ._libs.rolling import rolling_slope, rolling_rsquare, rolling_resi from ._libs.expanding import expanding_slope, expanding_rsquare, expanding_resi except ImportError as err: - print("#### Do not import qlib package in the repository directory in case of importing qlib from . without compiling #####") + print( + "#### Do not import qlib package in the repository directory in case of importing qlib from . without compiling #####" + ) raise __all__ = ( From 7d40ba753aca35c8452b1ba209aeff8ea37749a0 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Thu, 17 Dec 2020 00:35:35 +0800 Subject: [PATCH 36/85] Update README.md --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index b3728a1ec..124c34363 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,6 +1,6 @@ # Requirements -Here is the minimal hardware requirements to run the example. +Here is the minimal hardware requirements to run the `workflow_by_code` example. - Memory: 16G - Free Disk: 5G From 4b4cd38ca61f6f9848e8f1493a8af18add847612 Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 17 Dec 2020 14:41:12 +0800 Subject: [PATCH 37/85] Update benchmark results --- examples/benchmarks/README.md | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 2557eb9b3..3f9b1f55b 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -1,32 +1,32 @@ # Benchmarks Performance -Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 10 runs. +Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs. The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results. ## Alpha360 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |---|---|---|---|---|---|---|---|---| -| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0655±0.00 | -0.6985±0.00| -0.2961±0.00 | -| CatBoost | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 | -| XGBoost | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 | -| LightGBM | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 | -| MLP | Alpha360 | 0.0253±0.01 | 0.1954±0.05| 0.0329±0.00 | 0.2687±0.04 | 0.0161±0.01 | 0.1989±0.19| -0.1275±0.03 | -| GRU | Alpha360 | 0.0503±0.01 | 0.3946±0.06| 0.0588±0.00 | 0.4737±0.05 | 0.0799±0.02 | 1.0940±0.26| -0.0810±0.03 | -| LSTM | Alpha360 | 0.0466±0.01 | 0.3644±0.06| 0.0555±0.00 | 0.4451±0.04 | 0.0783±0.05 | 1.0539±0.65| -0.0844±0.03 | -| ALSTM | Alpha360 | 0.0472±0.00 | 0.3558±0.04| 0.0577±0.00 | 0.4522±0.04 | 0.0522±0.02 | 0.7090±0.32| -0.1059±0.03 | -| GATs | Alpha360 | 0.0480±0.00 | 0.3555±0.02| 0.0598±0.00 | 0.4616±0.01 | 0.0857±0.03 | 1.1317±0.42| -0.0917±0.01 | +| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 | +| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 | +| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 | +| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 | +| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 | +| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 | +| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 | +| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 | +| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 | ## Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |---|---|---|---|---|---|---|---|---| | Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 | -| CatBoost | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1567±0.00| -0.0787±0.00 | -| XGBoost | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | -| LightGBM | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | -| MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 | -| TFT | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.08| -0.1754±0.02 | -| GRU | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 | -| LSTM | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 | -| ALSTM | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 | -| GATs | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 | \ No newline at end of file +| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 | +| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | +| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | +| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 | +| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 | +| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 | +| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 | +| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 | +| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 | \ No newline at end of file From 3f84c3768af16993c01300a049da4aecefec700c Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Thu, 17 Dec 2020 09:47:25 +0800 Subject: [PATCH 38/85] Make __getattr__ to raise AttributeError instead of return it.Avoid using try except. --- qlib/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/config.py b/qlib/config.py index 869ea99c9..c213df9c4 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -27,10 +27,10 @@ class Config: return self.__dict__["_config"][key] def __getattr__(self, attr): - try: + if attr in self.__dict__["_config"]: return self.__dict__["_config"][attr] - except KeyError: - return AttributeError(f"No such {attr} in self._config") + + raise AttributeError(f"No such {attr} in self._config") def __setitem__(self, key, value): self.__dict__["_config"][key] = value From 8bbfd8810caea3436bf9056c04bdd151e7018115 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Thu, 17 Dec 2020 10:24:02 +0800 Subject: [PATCH 39/85] formatting --- qlib/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/config.py b/qlib/config.py index c213df9c4..16df27b95 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -29,7 +29,7 @@ class Config: def __getattr__(self, attr): if attr in self.__dict__["_config"]: return self.__dict__["_config"][attr] - + raise AttributeError(f"No such {attr} in self._config") def __setitem__(self, key, value): From f3f1867b1432a8f47cf7e4a4f0a657c4b2bd4635 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Thu, 17 Dec 2020 12:28:48 +0800 Subject: [PATCH 40/85] fix wrong attribute --- qlib/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/data/data.py b/qlib/data/data.py index a4c3d63f2..3dcb22699 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -1052,7 +1052,7 @@ def register_all_wrappers(): if getattr(C, "calendar_cache", None) is not None: _calendar_provider = init_instance_by_config(C.calendar_cache, module, provide=_calendar_provider) register_wrapper(Cal, _calendar_provider, "qlib.data") - logger.debug(f"registering Cal {C.calendar_provider}-{C.calenar_cache}") + logger.debug(f"registering Cal {C.calendar_provider}-{C.calendar_cache}") register_wrapper(Inst, C.instrument_provider, "qlib.data") logger.debug(f"registering Inst {C.instrument_provider}") From ea018ed4dc6da2706f41487e031ca4d936e0a4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Domaga=C5=82a?= <54290898+maciejdomagala@users.noreply.github.com> Date: Thu, 17 Dec 2020 09:18:15 +0100 Subject: [PATCH 41/85] fixing typos #2 --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index 99af4a457..b4eac4998 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -43,7 +43,7 @@ python get_data.py qlib_data --help ### US data -> Need to download data first: [Downlaod US Data](#Downlaod-US-Data) +> Need to download data first: [Download US Data](#Download-US-Data) ```python import qlib From 66d9bd1a687492d07ea0b57d337d1be85816199b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Domaga=C5=82a?= <54290898+maciejdomagala@users.noreply.github.com> Date: Fri, 18 Dec 2020 10:52:39 +0100 Subject: [PATCH 42/85] fixing typos #3 I just randomly find these by the way. Good work on the framework! --- qlib/contrib/strategy/strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/contrib/strategy/strategy.py b/qlib/contrib/strategy/strategy.py index 23e8b5185..74df39f3e 100644 --- a/qlib/contrib/strategy/strategy.py +++ b/qlib/contrib/strategy/strategy.py @@ -30,7 +30,7 @@ class BaseStrategy: Parameters ----------- - score_series : pd.Seires + score_series : pd.Series stock_id , score. current : Position() current state of position. From 824de921d10c7c60873f7cbba718c5437f9a916a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Domaga=C5=82a?= <54290898+maciejdomagala@users.noreply.github.com> Date: Fri, 18 Dec 2020 15:20:16 +0100 Subject: [PATCH 43/85] fixing typos #4 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2ae36827e..4d5b6b335 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu cd examples # Avoid running program under the directory contains `qlib` qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml ``` - The result of `qrun` is as follows, please refer to please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. + The result of `qrun` is as follows, please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. ```bash @@ -306,4 +306,4 @@ provided by the bot. You will only need to do this once across all repos using o This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. \ No newline at end of file +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. From 995fa98fc6b7ca5eed0ca487bdc3727743a979b6 Mon Sep 17 00:00:00 2001 From: Young Date: Sun, 20 Dec 2020 05:50:31 +0000 Subject: [PATCH 44/85] add more doc to PortAnaRecord --- qlib/workflow/record_temp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index a617f5d1f..bcbcd3cb4 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -213,6 +213,11 @@ class SigAnaRecord(SignalRecord): class PortAnaRecord(SignalRecord): """ This is the Portfolio Analysis Record class that generates the analysis results such as those of backtest. This class inherits the ``RecordTemp`` class. + + The following files will be stored in recorder + - report_normal.pkl & positions_normal.pkl: + - The return report and detailed positions of the backtest, returned by `qlib/contrib/evaluate.py:backtest` + - port_analysis.pkl : The risk analysis of your portfolio, returned by `qlib/contrib/evaluate.py:risk_analysis` """ artifact_path = "portfolio_analysis" From e0c460c33c07105b27aa4c7db019baef2f0bbe85 Mon Sep 17 00:00:00 2001 From: bxdd <45119470+bxdd@users.noreply.github.com> Date: Mon, 21 Dec 2020 21:35:41 +0800 Subject: [PATCH 45/85] Update alpha.rst --- docs/advanced/alpha.rst | 64 ++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/docs/advanced/alpha.rst b/docs/advanced/alpha.rst index e6146dd0c..fcfd3286a 100644 --- a/docs/advanced/alpha.rst +++ b/docs/advanced/alpha.rst @@ -50,53 +50,33 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib: .. code-block:: python - >> from qlib.data.dataset.handler import QLibDataHandler + >> from qlib.data.dataset.loader import QlibDataLoader >> MACD_EXP = '(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close' >> fields = [MACD_EXP] # MACD >> names = ['MACD'] - >> labels = ['$close'] # label + >> labels = ['Ref($close, -2)/Ref($close, -1) - 1'] # label >> label_names = ['LABEL'] - >> data_handler = QLibDataHandler(start_date='2010-01-01', end_date='2017-12-31', fields=fields, names=names, labels=labels, label_names=label_names) - >> TRAINER_CONFIG = { - .. "train_start_date": "2007-01-01", - .. "train_end_date": "2014-12-31", - .. "validate_start_date": "2015-01-01", - .. "validate_end_date": "2016-12-31", - .. "test_start_date": "2017-01-01", - .. "test_end_date": "2020-08-01", + >> data_loader_config = { + .. "feature": (fields, names), + .. "label": (labels, label_names) .. } - >> feature_train, label_train, feature_validate, label_validate, feature_test, label_test = data_handler.get_split_data(**TRAINER_CONFIG) - >> print(feature_train, label_train) - MACD - instrument datetime - SH600000 2010-01-04 -0.008625 - 2010-01-05 -0.007234 - 2010-01-06 -0.007693 - 2010-01-07 -0.009633 - 2010-01-08 -0.009891 - ... ... - SZ300251 2014-12-25 0.043072 - 2014-12-26 0.041345 - 2014-12-29 0.042733 - 2014-12-30 0.042066 - 2014-12-31 0.036299 - - [322025 rows x 1 columns] - LABEL - instrument datetime - SH600000 2010-01-04 4.260015 - 2010-01-05 4.292182 - 2010-01-06 4.207747 - 2010-01-07 4.113258 - 2010-01-08 4.159496 - ... ... - SZ300251 2014-12-25 4.343212 - 2014-12-26 4.470587 - 2014-12-29 4.762474 - 2014-12-30 4.369748 - 2014-12-31 4.182222 - - [322025 rows x 1 columns] + >> data_handler = QlibDataLoader(config=data_loader_config) + >> df = data_handler.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31') + >> print(df) + feature label + MACD LABEL + datetime instrument + 2010-01-04 SH600000 -0.011547 -0.019672 + SH600004 0.002745 -0.014721 + SH600006 0.010133 0.002911 + SH600008 -0.001113 0.009818 + SH600009 0.025878 -0.017758 + ... ... ... + 2017-12-29 SZ300124 0.007306 -0.005074 + SZ300136 -0.013492 0.056352 + SZ300144 -0.000966 0.011853 + SZ300251 0.004383 0.021739 + SZ300315 -0.030557 0.012455 Reference =========== From 7e37fa710a2e3af9b5fb7b6669e6a302308c3c0f Mon Sep 17 00:00:00 2001 From: bxdd Date: Mon, 21 Dec 2020 13:58:15 +0000 Subject: [PATCH 46/85] update alpha.rst --- docs/advanced/alpha.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/advanced/alpha.rst b/docs/advanced/alpha.rst index fcfd3286a..3e73158c8 100644 --- a/docs/advanced/alpha.rst +++ b/docs/advanced/alpha.rst @@ -60,8 +60,8 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib: .. "feature": (fields, names), .. "label": (labels, label_names) .. } - >> data_handler = QlibDataLoader(config=data_loader_config) - >> df = data_handler.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31') + >> data_loader = QlibDataLoader(config=data_loader_config) + >> df = data_loader.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31') >> print(df) feature label MACD LABEL @@ -81,6 +81,6 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib: Reference =========== -To learn more about ``Data Handler``, please refer to `Data Handler <../component/data.html>`_ +To learn more about ``Data Loader``, please refer to `Data Loader <../component/data.html#data-loader>`_ To learn more about ``Data API``, please refer to `Data API <../component/data.html>`_ From ae0e0eca3d0911ca654efb66172483a245683bf8 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Mon, 21 Dec 2020 18:17:20 +0800 Subject: [PATCH 47/85] better MemCacheUnit implement --- qlib/data/cache.py | 157 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 32 deletions(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index d53f578b1..a652018ff 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -13,6 +13,7 @@ import pickle import traceback import redis_lock import contextlib +import abc from pathlib import Path import numpy as np import pandas as pd @@ -39,33 +40,127 @@ class QlibCacheException(RuntimeError): pass -class MemCacheUnit(OrderedDict): +class MemCacheUnit(abc.ABC): """Memory Cache Unit.""" - # TODO: use min_heap to replace ordereddict for better performance - def __init__(self, *args, **kwargs): - self.size_limit = kwargs.pop("size_limit", None) - # limit_type: check size_limit type, length(call fun: len) or size(call fun: sys.getsizeof) + self.size_limit = kwargs.pop("size_limit", 0) self.limit_type = kwargs.pop("limit_type", "length") - super(MemCacheUnit, self).__init__(*args, **kwargs) - self._check_size_limit() + + assert self.limit_type in ["length", "sizeof"], ValueError( + "limit_type shoule be one of ['length', 'sizeof']" + ) + assert self.size_limit >= 0, ValueError( + "size_limit shoule not be negative.The default 0 means unlimited!" + ) + + # limit_flag: whether to popitem or not + self._limit_flag = 1 if self.size_limit > 0 else 0 + self._size = 0 + self.od = OrderedDict() def __setitem__(self, key, value): - super(MemCacheUnit, self).__setitem__(key, value) - self._check_size_limit() + # TODO: thread safe?__setitem__ failure might cause inconsistent size? + + # precalculate the size after od.__setitem__ + self._adjust_size(key, value) + + self.od.__setitem__(key, value) + + # move the key to end,make it latest + self.od.move_to_end(key) + + # pop the oldest items beyond size limit + while self._size * self._limit_flag > self.size_limit: + self.popitem(last=False) def __getitem__(self, key): - value = super(MemCacheUnit, self).__getitem__(key) - super(MemCacheUnit, self).__delitem__(key) - super(MemCacheUnit, self).__setitem__(key, value) - return value + v = self.od.__getitem__(key) + self.od.move_to_end(key) + return v - def _check_size_limit(self): - if self.size_limit is not None: - get_cur_size = lambda x: len(x) if self.limit_type == "length" else sum(map(sys.getsizeof, x.values())) - while get_cur_size(self) > self.size_limit: - self.popitem(last=False) + def __contains__(self, key): + return key in self.od + + def __len__(self): + return self.od.__len__() + + def __repr__(self): + return f"{self.__class__.__name__}\n{self.od.__repr__()}" + + def set_limit_size(self, limit): + self.size_limit = limit + + def set_limit(self, is_limited=True): + self._limit_flag = 1 if is_limited else 0 + + @property + def is_limited(self): + return bool(self._limit_flag) + + @property + def total_size(self): + return self._size + + def clear(self): + self._size = 0 + self.od.clear() + + @abc.abstractmethod + def _adjust_size(self, key, value): + raise NotImplementedError + + @abc.abstractmethod + def popitem(self, last=True): + raise NotImplementedError + + @abc.abstractmethod + def pop(self, key): + raise NotImplementedError + + +class MemCacheLengthUnit(MemCacheUnit): + def __init__(self, size_limit=0): + super().__init__(size_limit=size_limit, limit_type="length") + + def _adjust_size(self, key, value): + if key not in self.od: + self._size += 1 + + def popitem(self, last=True): + k, v = self.od.popitem(last=last) + self._size -= 1 + + return k, v + + def pop(self, key): + v = self.od.pop(key) + self._size -= 1 + + return v + + +class MemCacheSizeofUnit(MemCacheUnit): + def __init__(self, size_limit=0): + super().__init__(size_limit=size_limit, limit_type="sizeof") + + def _adjust_size(self, key, value): + if key in self.od: + self._size = self._size - sys.getsizeof(self.od[key]) + sys.getsizeof(value) + else: + self._size += sys.getsizeof(value) + + def popitem(self, last=True): + k, v = self.od.popitem(last=last) + self._size -= sys.getsizeof(v) + + return k, v + + def pop(self, key): + v = self.od.pop(key) + self._size -= sys.getsizeof(v) + + return v class MemCache: @@ -79,22 +174,20 @@ class MemCache: mem_cache_size_limit: cache max size. limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof). """ - if limit_type not in ["length", "sizeof"]: + + size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit + + if limit_type == "length": + self.__calendar_mem_cache = MemCacheLengthUnit(size_limit) + self.__instrument_mem_cache = MemCacheLengthUnit(size_limit) + self.__feature_mem_cache = MemCacheLengthUnit(size_limit) + elif limit_type == "sizeof": + self.__calendar_mem_cache = MemCacheSizeofUnit(size_limit) + self.__instrument_mem_cache = MemCacheSizeofUnit(size_limit) + self.__feature_mem_cache = MemCacheSizeofUnit(size_limit) + else: raise ValueError(f"limit_type must be length or sizeof, your limit_type is {limit_type}") - self.__calendar_mem_cache = MemCacheUnit( - size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit, - limit_type=limit_type, - ) - self.__instrument_mem_cache = MemCacheUnit( - size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit, - limit_type=limit_type, - ) - self.__feature_mem_cache = MemCacheUnit( - size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit, - limit_type=limit_type, - ) - def __getitem__(self, key): if key == "c": return self.__calendar_mem_cache From 4c4f0f3c5e1bc31943b71e42110a2ef1b6642e24 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Tue, 22 Dec 2020 10:50:04 +0800 Subject: [PATCH 48/85] black format --- qlib/data/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index a652018ff..f6659e863 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -174,9 +174,9 @@ class MemCache: mem_cache_size_limit: cache max size. limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof). """ - + size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit - + if limit_type == "length": self.__calendar_mem_cache = MemCacheLengthUnit(size_limit) self.__instrument_mem_cache = MemCacheLengthUnit(size_limit) From 40bbafcaabd7374bb906fc333be8f688756871d6 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Tue, 22 Dec 2020 10:54:13 +0800 Subject: [PATCH 49/85] black format --- qlib/data/cache.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index f6659e863..00cbcec9e 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -47,12 +47,8 @@ class MemCacheUnit(abc.ABC): self.size_limit = kwargs.pop("size_limit", 0) self.limit_type = kwargs.pop("limit_type", "length") - assert self.limit_type in ["length", "sizeof"], ValueError( - "limit_type shoule be one of ['length', 'sizeof']" - ) - assert self.size_limit >= 0, ValueError( - "size_limit shoule not be negative.The default 0 means unlimited!" - ) + assert self.limit_type in ["length", "sizeof"], ValueError("limit_type shoule be one of ['length', 'sizeof']") + assert self.size_limit >= 0, ValueError("size_limit shoule not be negative.The default 0 means unlimited!") # limit_flag: whether to popitem or not self._limit_flag = 1 if self.size_limit > 0 else 0 From 5efe82fb566d640040381177a69c578ebf0f5323 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Tue, 22 Dec 2020 18:38:59 +0800 Subject: [PATCH 50/85] make code cleaner --- qlib/data/cache.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index 00cbcec9e..0f753b7b5 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -174,16 +174,16 @@ class MemCache: size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit if limit_type == "length": - self.__calendar_mem_cache = MemCacheLengthUnit(size_limit) - self.__instrument_mem_cache = MemCacheLengthUnit(size_limit) - self.__feature_mem_cache = MemCacheLengthUnit(size_limit) + klass = MemCacheLengthUnit elif limit_type == "sizeof": - self.__calendar_mem_cache = MemCacheSizeofUnit(size_limit) - self.__instrument_mem_cache = MemCacheSizeofUnit(size_limit) - self.__feature_mem_cache = MemCacheSizeofUnit(size_limit) + klass = MemCacheSizeofUnit else: raise ValueError(f"limit_type must be length or sizeof, your limit_type is {limit_type}") + self.__calendar_mem_cache = klass(size_limit) + self.__instrument_mem_cache = klass(size_limit) + self.__feature_mem_cache = klass(size_limit) + def __getitem__(self, key): if key == "c": return self.__calendar_mem_cache From 4a62b929ad437669467ff2417eb6b8abea981aa9 Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Wed, 23 Dec 2020 10:53:56 +0800 Subject: [PATCH 51/85] add _get_value_size and remove _limit_flag --- qlib/data/cache.py | 88 ++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 57 deletions(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index 0f753b7b5..a50b1345e 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -45,13 +45,6 @@ class MemCacheUnit(abc.ABC): def __init__(self, *args, **kwargs): self.size_limit = kwargs.pop("size_limit", 0) - self.limit_type = kwargs.pop("limit_type", "length") - - assert self.limit_type in ["length", "sizeof"], ValueError("limit_type shoule be one of ['length', 'sizeof']") - assert self.size_limit >= 0, ValueError("size_limit shoule not be negative.The default 0 means unlimited!") - - # limit_flag: whether to popitem or not - self._limit_flag = 1 if self.size_limit > 0 else 0 self._size = 0 self.od = OrderedDict() @@ -66,9 +59,10 @@ class MemCacheUnit(abc.ABC): # move the key to end,make it latest self.od.move_to_end(key) - # pop the oldest items beyond size limit - while self._size * self._limit_flag > self.size_limit: - self.popitem(last=False) + if self.limited: + # pop the oldest items beyond size limit + while self._size > self.size_limit: + self.popitem(last=False) def __getitem__(self, key): v = self.od.__getitem__(key) @@ -82,17 +76,15 @@ class MemCacheUnit(abc.ABC): return self.od.__len__() def __repr__(self): - return f"{self.__class__.__name__}\n{self.od.__repr__()}" + return f"{self.__class__.__name__}\n{self.od.__repr__()}" def set_limit_size(self, limit): self.size_limit = limit - def set_limit(self, is_limited=True): - self._limit_flag = 1 if is_limited else 0 - @property - def is_limited(self): - return bool(self._limit_flag) + def limited(self): + """whether memory cache is limited""" + return self.size_limit > 0 @property def total_size(self): @@ -102,61 +94,43 @@ class MemCacheUnit(abc.ABC): self._size = 0 self.od.clear() - @abc.abstractmethod - def _adjust_size(self, key, value): - raise NotImplementedError - - @abc.abstractmethod def popitem(self, last=True): - raise NotImplementedError + k, v = self.od.popitem(last=last) + self._size -= self._get_value_size(v) + + return k, v + + def pop(self, key): + v = self.od.pop(key) + self._size -= self._get_value_size(v) + + return v + + def _adjust_size(self, key, value): + if key in self.od: + self._size -= sys.getsizeof(self.od[key]) + + self._size += self._get_value_size(value) @abc.abstractmethod - def pop(self, key): + def _get_value_size(self, value): raise NotImplementedError class MemCacheLengthUnit(MemCacheUnit): def __init__(self, size_limit=0): - super().__init__(size_limit=size_limit, limit_type="length") + super().__init__(size_limit=size_limit) - def _adjust_size(self, key, value): - if key not in self.od: - self._size += 1 - - def popitem(self, last=True): - k, v = self.od.popitem(last=last) - self._size -= 1 - - return k, v - - def pop(self, key): - v = self.od.pop(key) - self._size -= 1 - - return v + def _get_value_size(self, value): + return 1 class MemCacheSizeofUnit(MemCacheUnit): def __init__(self, size_limit=0): - super().__init__(size_limit=size_limit, limit_type="sizeof") + super().__init__(size_limit=size_limit) - def _adjust_size(self, key, value): - if key in self.od: - self._size = self._size - sys.getsizeof(self.od[key]) + sys.getsizeof(value) - else: - self._size += sys.getsizeof(value) - - def popitem(self, last=True): - k, v = self.od.popitem(last=last) - self._size -= sys.getsizeof(v) - - return k, v - - def pop(self, key): - v = self.od.pop(key) - self._size -= sys.getsizeof(v) - - return v + def _get_value_size(self, value): + return sys.getsizeof(value) class MemCache: From e8d7a226516430dc13c5d6d92f80e50ed5f51b5d Mon Sep 17 00:00:00 2001 From: hadrianl <137150224@qq.com> Date: Wed, 23 Dec 2020 11:01:31 +0800 Subject: [PATCH 52/85] fix _adjust_size --- qlib/data/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index a50b1345e..6433127f4 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -108,7 +108,7 @@ class MemCacheUnit(abc.ABC): def _adjust_size(self, key, value): if key in self.od: - self._size -= sys.getsizeof(self.od[key]) + self._size -= self._get_value_size(self.od[key]) self._size += self._get_value_size(value) From 95de4088df5c3aa695e1beaefa95f98da1bb7f48 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 23 Dec 2020 11:03:07 +0800 Subject: [PATCH 53/85] Fix recorder temp dir bug --- qlib/workflow/recorder.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/qlib/workflow/recorder.py b/qlib/workflow/recorder.py index 4c1ddfdfe..34507cb0c 100644 --- a/qlib/workflow/recorder.py +++ b/qlib/workflow/recorder.py @@ -202,9 +202,6 @@ class MLflowRecorder(Recorder): super(MLflowRecorder, self).__init__(experiment_id, name) self._uri = uri self.artifact_uri = None - # set up file manager for saving objects - self.temp_dir = tempfile.mkdtemp() - self.fm = FileManager(Path(self.temp_dir).absolute()) self.client = mlflow.tracking.MlflowClient(tracking_uri=self._uri) # construct from mlflow run if mlflow_run is not None: @@ -223,6 +220,15 @@ class MLflowRecorder(Recorder): else None ) + @property + def fm(self): + # only create temp dir when using file managers + if not hasattr(self, "_fm"): + # set up file manager for saving objects + self._temp_dir = tempfile.mkdtemp() + self._fm = FileManager(Path(self._temp_dir).absolute()) + return self._fm + def start_run(self): # set the tracking uri mlflow.set_tracking_uri(self._uri) @@ -248,7 +254,7 @@ class MLflowRecorder(Recorder): self.end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") if self.status != Recorder.STATUS_S: self.status = status - shutil.rmtree(self.temp_dir) + shutil.rmtree(self._temp_dir) def save_objects(self, local_path=None, artifact_path=None, **kwargs): assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." From 660edeb94f9df3f1f8a4e78bb71f30c7cafa9da2 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 23 Dec 2020 18:59:43 +0800 Subject: [PATCH 54/85] Remove fm in recorder --- qlib/workflow/recorder.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/qlib/workflow/recorder.py b/qlib/workflow/recorder.py index 34507cb0c..ceb57150c 100644 --- a/qlib/workflow/recorder.py +++ b/qlib/workflow/recorder.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. import mlflow -import shutil, os, pickle, tempfile, codecs +import shutil, os, pickle, tempfile, codecs, pickle from pathlib import Path from datetime import datetime from ..utils.objm import FileManager @@ -220,15 +220,6 @@ class MLflowRecorder(Recorder): else None ) - @property - def fm(self): - # only create temp dir when using file managers - if not hasattr(self, "_fm"): - # set up file manager for saving objects - self._temp_dir = tempfile.mkdtemp() - self._fm = FileManager(Path(self._temp_dir).absolute()) - return self._fm - def start_run(self): # set the tracking uri mlflow.set_tracking_uri(self._uri) @@ -254,16 +245,18 @@ class MLflowRecorder(Recorder): self.end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") if self.status != Recorder.STATUS_S: self.status = status - shutil.rmtree(self._temp_dir) def save_objects(self, local_path=None, artifact_path=None, **kwargs): assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." if local_path is not None: self.client.log_artifacts(self.id, local_path, artifact_path) else: + temp_dir = Path(tempfile.mkdtemp()).resolve() for name, data in kwargs.items(): - self.fm.save_obj(data, name) - self.client.log_artifact(self.id, self.fm.path / name, artifact_path) + with (temp_dir / name).open("wb") as f: + pickle.dump(data, f) + self.client.log_artifact(self.id, temp_dir / name, artifact_path) + shutil.rmtree(temp_dir) def load_object(self, name): assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." From a0f32036a61416bd4db9e6aa248518c0c82fbccc Mon Sep 17 00:00:00 2001 From: zhupr Date: Tue, 15 Dec 2020 23:41:14 +0800 Subject: [PATCH 55/85] Fix the first trading day of the calendar extra in report_df --- qlib/contrib/backtest/backtest.py | 2 +- qlib/contrib/evaluate.py | 4 +- .../report/analysis_position/report.py | 4 +- qlib/utils/__init__.py | 54 ++++++++++++------- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/qlib/contrib/backtest/backtest.py b/qlib/contrib/backtest/backtest.py index 7ee8dceb0..2e785357c 100644 --- a/qlib/contrib/backtest/backtest.py +++ b/qlib/contrib/backtest/backtest.py @@ -69,7 +69,7 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark) raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark") bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean() - trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift)) + trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift)) executor = SimulatorExecutor(trade_exchange, verbose=verbose) # trading apart diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 4bb5e4372..a7b715321 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -168,7 +168,7 @@ def get_exchange( codes = "all" # TODO: We must ensure that 'all.txt' includes all the stocks dates = sorted(pred.index.get_level_values("datetime").unique()) - dates = np.append(dates, get_date_range(dates[-1], shift=shift)) + dates = np.append(dates, get_date_range(dates[-1], left_shift=1, right_shift=shift)) exchange = Exchange( trade_dates=dates, @@ -340,7 +340,7 @@ def long_short_backtest( _pred_dates = pred.index.get_level_values(level="datetime") predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max()) - trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift)) + trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift)) long_returns = {} short_returns = {} diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index 438aab8b9..f82e654c4 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -38,7 +38,7 @@ def _calculate_report_data(df: pd.DataFrame) -> pd.DataFrame: :param df: :return: """ - + index_names = df.index.names df.index = df.index.strftime("%Y-%m-%d") report_df = pd.DataFrame() @@ -58,6 +58,8 @@ def _calculate_report_data(df: pd.DataFrame) -> pd.DataFrame: report_df["turnover"] = df["turnover"] report_df.sort_index(ascending=True, inplace=True) + + report_df.index.names = index_names return report_df diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index ddc17c478..a5a4b4a56 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -279,8 +279,10 @@ def compare_dict_value(src_data: dict, dst_data: dict): def create_save_path(save_path=None): """Create save path - :param save_path: - :return: + Parameters + ---------- + save_path: str + """ if save_path: if not os.path.exists(save_path): @@ -471,30 +473,28 @@ def is_tradable_date(cur_date): return str(cur_date.date()) == str(D.calendar(start_time=cur_date, future=True)[0].date()) -def get_date_range(trading_date, shift, future=False): +def get_date_range(trading_date, left_shift=0, right_shift=0, future=False): """get trading date range by shift - :param trading_date: - :param shift: int - :param future: bool - :return: + Parameters + ---------- + trading_date: pd.Timestamp + left_shift: int + right_shift: int + future: bool + """ + from ..data import D - calendar = D.calendar(future=future) - if pd.to_datetime(trading_date) not in list(calendar): - raise ValueError("{} is not trading day!".format(str(trading_date))) - day_index = bisect.bisect_left(calendar, trading_date) - if 0 <= (day_index + shift) < len(calendar): - if shift > 0: - return calendar[day_index + 1 : day_index + 1 + shift] - else: - return calendar[day_index + shift : day_index] - else: - return calendar + start = get_date_by_shift(trading_date, left_shift, future=future) + end = get_date_by_shift(trading_date, right_shift, future=future) + + calendar = D.calendar(start, end, future=future) + return calendar -def get_date_by_shift(trading_date, shift, future=False): +def get_date_by_shift(trading_date, shift, future=False, clip_shift=True): """get trading date with shift bias wil cur_date e.g. : shift == 1, return next trading date shift == -1, return previous trading date @@ -502,8 +502,22 @@ def get_date_by_shift(trading_date, shift, future=False): trading_date : pandas.Timestamp current date shift : int + clip_shift: bool + """ - return get_date_range(trading_date, shift, future)[0 if shift < 0 else -1] if shift != 0 else trading_date + from qlib.data import D + + cal = D.calendar(future=future) + if pd.to_datetime(trading_date) not in list(cal): + raise ValueError("{} is not trading day!".format(str(trading_date))) + _index = bisect.bisect_left(cal, trading_date) + shift_index = _index + shift + if shift_index < 0 or shift_index >= len(cal): + if clip_shift: + shift_index = np.clip(shift_index, 0, len(cal) - 1) + else: + raise IndexError(f"The shift_index({shift_index}) of the trading day ({trading_date}) is out of range") + return cal[shift_index] def get_next_trading_date(trading_date, future=False): From 3e6877ff0fe619e6b6c583b435c4553971607017 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Fri, 25 Dec 2020 22:01:18 +0800 Subject: [PATCH 56/85] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4d5b6b335..dbab30d2d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative - [More About Qlib](#more-about-qlib) - [Offline Mode and Online Mode](#offline-mode-and-online-mode) - [Performance of Qlib Data Server](#performance-of-qlib-data-server) +- [Related Reports](#related-reports) - [Contributing](#contributing) @@ -291,7 +292,10 @@ Such overheads greatly slow down the data loading process. Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation. - +# Related Reports +- [【华泰金工林晓明团队】微软AI量化投资平台Qlib体验——华泰人工智能系列之四十](https://mp.weixin.qq.com/s/Brcd7im4NibJOJzZfMn6tQ) +- [微软也搞AI量化平台?还是开源的!](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ) +- [微矿Qlib:业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ) # Contributing From 2da2e9bd9ed20a126abe19cf5b58babf2c65750a Mon Sep 17 00:00:00 2001 From: you-n-g Date: Sat, 26 Dec 2020 20:21:30 +0800 Subject: [PATCH 57/85] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index dbab30d2d..9e3bfce09 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,8 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor ``` **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**. +**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test.yml) may help you find the problem. + ## Data Preparation Load and prepare data by running the following code: ```bash From 4a30d9d1ec917662235397fe2499003a3339d66f Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 28 Dec 2020 12:02:01 +0000 Subject: [PATCH 58/85] update github issue template --- .github/ISSUE_TEMPLATE/bug-report.md | 5 +++-- scripts/collect_info.py | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 0ba45684e..803459be1 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -28,7 +28,8 @@ Steps to reproduce the behavior: ## Environment -**Note**: One could run `python scripts/collect_info.py` under the `qlib` directory to get the following information. +**Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information +and paste them here directly. - Qlib version: - Python version: @@ -37,4 +38,4 @@ Steps to reproduce the behavior: ## Additional Notes - \ No newline at end of file + diff --git a/scripts/collect_info.py b/scripts/collect_info.py index ba53deb30..9950ac4cb 100644 --- a/scripts/collect_info.py +++ b/scripts/collect_info.py @@ -9,6 +9,11 @@ QLIB_PATH = Path(__file__).absolute().resolve().parent.parent class InfoCollector: + ''' + User could collect system info by following commands + `cd scripts && python collect_info.py all` + - NOTE: please avoid running this script in the project folder which contains `qlib` + ''' def sys(self): """collect system related info""" for method in ["system", "machine", "platform", "version"]: From afcd91a2d02b22ff643746ccde0191179111f267 Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 28 Dec 2020 12:04:03 +0000 Subject: [PATCH 59/85] black format --- scripts/collect_info.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/collect_info.py b/scripts/collect_info.py index 9950ac4cb..512a7a140 100644 --- a/scripts/collect_info.py +++ b/scripts/collect_info.py @@ -9,11 +9,12 @@ QLIB_PATH = Path(__file__).absolute().resolve().parent.parent class InfoCollector: - ''' + """ User could collect system info by following commands `cd scripts && python collect_info.py all` - NOTE: please avoid running this script in the project folder which contains `qlib` - ''' + """ + def sys(self): """collect system related info""" for method in ["system", "machine", "platform", "version"]: From 46c8d791ac0d9a3adccfcbf2a0d94e917467e056 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 30 Dec 2020 17:33:06 +0800 Subject: [PATCH 60/85] Fix doc bugs --- docs/component/data.rst | 1 + docs/conf.py | 5 +++++ qlib/model/base.py | 11 ++++++----- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/component/data.rst b/docs/component/data.rst index cb1103e72..970ac271b 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -295,6 +295,7 @@ The ``Processor`` module in ``Qlib`` is designed to be learnable and it is respo - ``RobustZScoreNorm``: `processor` that applies robust z-score normalization. - ``CSZScoreNorm``: `processor` that applies cross sectional z-score normalization. - ``CSRankNorm``: `processor` that applies cross sectional rank normalization. +- ``CSZFillna``: `processor` that fills N/A values in a cross sectional way by the mean of the column. Users can also create their own `processor` by inheriting the base class of ``Processor``. Please refer to the implementation of all the processors for more information (`Processor Link `_). diff --git a/docs/conf.py b/docs/conf.py index 5359d08ed..6e52b0e34 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -226,3 +226,8 @@ epub_exclude_files = ["search.html"] autodoc_member_order = "bysource" autodoc_default_flags = ["members"] +autodoc_default_options = { + "members": True, + "member-order": "bysource", + "special-members": "__init__", +} diff --git a/qlib/model/base.py b/qlib/model/base.py index 4a81d5a31..5a295787f 100644 --- a/qlib/model/base.py +++ b/qlib/model/base.py @@ -30,11 +30,6 @@ class Model(BaseModel): The attribute names of learned model should `not` start with '_'. So that the model could be dumped to disk. - Parameters - ---------- - dataset : Dataset - dataset will generate the processed data from model training. - The following code example shows how to retrieve `x_train`, `y_train` and `w_train` from the `dataset`: .. code-block:: Python @@ -53,6 +48,12 @@ class Model(BaseModel): except KeyError as e: w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index) w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index) + + Parameters + ---------- + dataset : Dataset + dataset will generate the processed data from model training. + """ raise NotImplementedError() From aefc98b1d7f900cc6af5fc0aa32af9d79353ce0c Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Mon, 4 Jan 2021 15:30:31 +0800 Subject: [PATCH 61/85] Update workflow_config_lstm_Alpha158.yaml Delete a redundant parameter. --- examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml index eedff35a6..15fa20ec3 100755 --- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml +++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml @@ -64,7 +64,6 @@ task: loss: mse n_jobs: 20 GPU: 0 - rnn_type: GRU dataset: class: TSDatasetH module_path: qlib.data.dataset From 18e040f50602a2868a3d6d0d3da861a4256f4e46 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Mon, 4 Jan 2021 15:33:22 +0800 Subject: [PATCH 62/85] Update workflow_config_gru_Alpha158.yaml Delete a redundant parameter. --- examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml index 13c2794bf..d3078314c 100755 --- a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml +++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml @@ -64,7 +64,6 @@ task: loss: mse n_jobs: 20 GPU: 0 - rnn_type: GRU dataset: class: TSDatasetH module_path: qlib.data.dataset From ba447d3448021cf95055cba798ba00479a86714f Mon Sep 17 00:00:00 2001 From: Young Date: Thu, 3 Dec 2020 08:26:41 +0000 Subject: [PATCH 63/85] update valute --- qlib/contrib/evaluate.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index a7b715321..07a63b8fb 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -16,6 +16,7 @@ from .backtest.backtest import backtest as backtest_func, get_date_range from ..data import D from ..config import C from ..data.dataset.utils import get_level_index +from ..utils import init_instance_by_config logger = get_module_logger("Evaluate") @@ -52,11 +53,14 @@ def get_strategy( margin=0.5, n_drop=5, risk_degree=0.95, - str_type="amount", + str_type="dropout", adjust_dates=None, ): """get_strategy + There will be 3 ways to return a stratgy. Please follow the code. + + Parameters ---------- @@ -87,7 +91,10 @@ def get_strategy( :class: Strategy an initialized strategy object """ + + # There will be 3 ways to return a strategy. if strategy is None: + # 1) create strategy with param `strategy` str_cls_dict = { "amount": "TopkAmountStrategy", "weight": "TopkWeightStrategy", @@ -102,6 +109,11 @@ def get_strategy( risk_degree=risk_degree, adjust_dates=adjust_dates, ) + elif isinstance(strategy, (dict, str)): + # 2) create strategy with init_instance_by_config + strategy = init_instance_by_config(strategy) + + # else: nothing happens. 3) Use the strategy directly if not isinstance(strategy, BaseStrategy): raise TypeError("Strategy not supported") return strategy From 4dbc8e52ec0f11b50dfc9dcf7dce3f92f0d235c8 Mon Sep 17 00:00:00 2001 From: Zhichong Fang Date: Wed, 6 Jan 2021 13:35:04 +0800 Subject: [PATCH 64/85] Update data.py Fix some typo --- qlib/tests/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qlib/tests/data.py b/qlib/tests/data.py index 66bfb0e29..9db6270ad 100644 --- a/qlib/tests/data.py +++ b/qlib/tests/data.py @@ -31,20 +31,20 @@ class GetData: if resp.status_code != 200: raise requests.exceptions.HTTPError() - chuck_size = 1024 + chunk_size = 1024 logger.warning( f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)" ) logger.info(f"{file_name} downloading......") with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar: with target_path.open("wb") as fp: - for chuck in resp.iter_content(chunk_size=chuck_size): - fp.write(chuck) - p_bar.update(chuck_size) + for chunk in resp.iter_content(chunk_size=chunk_size): + fp.write(chunk) + p_bar.update(chunk_size) self._unzip(target_path, target_dir) if self.delete_zip_file: - target_path.unlike() + target_path.unlink() @staticmethod def _unzip(file_path: Path, target_dir: Path): From 328cdeda4ad15086e582f60be5c62f991ea492ff Mon Sep 17 00:00:00 2001 From: you-n-g Date: Thu, 7 Jan 2021 11:12:49 +0800 Subject: [PATCH 65/85] Update README.md --- examples/benchmarks/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 6ed872ef5..d440a967e 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -30,3 +30,5 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 | | ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 | | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 | + +- The selected 20 features are based on the feature importance of a lightgbm-based model. From 231f37376bcb2c696c7c1758afa7e2bdea73585c Mon Sep 17 00:00:00 2001 From: Zhichong Fang Date: Thu, 7 Jan 2021 14:56:53 +0800 Subject: [PATCH 66/85] Fix unrecognized config bug --- qlib/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index 98920ed04..fa0f32278 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -45,9 +45,10 @@ def init(default_conf="client", **kwargs): C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN)) for k, v in kwargs.items(): - C[k] = v if k not in C: LOG.warning("Unrecognized config %s" % k) + else: + C[k] = v C.resolve_path() From 4361a4049ade6aff53cc1025d2542ebab2338d28 Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 7 Jan 2021 11:28:02 +0800 Subject: [PATCH 67/85] Fix create_recorder bug --- qlib/workflow/exp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index a92a9a9ea..c2548971a 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -65,13 +65,13 @@ class Experiment: """ raise NotImplementedError(f"Please implement the `end` method.") - def create_recorder(self, name=None): + def create_recorder(self, recorder_name=None): """ Create a recorder for each experiment. Parameters ---------- - name : str + recorder_name : str the name of the recorder to be created. Returns From 64cf2e2df8193fd71e46fb6af64134151a21e50d Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 12 Jan 2021 18:43:05 +0800 Subject: [PATCH 68/85] Update data.rst --- docs/component/data.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/component/data.rst b/docs/component/data.rst index 970ac271b..1218d0c1b 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -126,17 +126,17 @@ After conversion, users can find their Qlib format data in the directory `~/.qli The arguments of `--include_fields` should correspond with the column names of CSV files. The columns names of dataset provided by ``Qlib`` should include open, close, high, low, volume and factor at least. - `open` - The opening price + The adjusted opening price - `close` - The closing price + The adjusted closing price - `high` - The highest price + The adjusted highest price - `low` - The lowest price + The adjusted lowest price - `volume` - The trading volume + The adjusted trading volume - `factor` - The Restoration factor + The Restoration factor. Normally, original_price = adj_price / factor In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended. From 86e7c44c6b1c900b98486a22091986a57e231974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E9=9B=AA?= Date: Wed, 13 Jan 2021 15:06:10 +0800 Subject: [PATCH 69/85] Update initialization.rst need line changing --- docs/start/initialization.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/start/initialization.rst b/docs/start/initialization.rst index 05a329df7..15aa957d1 100644 --- a/docs/start/initialization.rst +++ b/docs/start/initialization.rst @@ -63,6 +63,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo If Qlib fails to connect redis via `redis_host` and `redis_port`, cache mechanism will not be used! Please refer to `Cache <../component/data.html#cache>`_ for details. - `exp_manager` Type: dict, optional parameter, the setting of `experiment manager` to be used in qlib. Users can specify an experiment manager class, as well as the tracking URI for all the experiments. However, please be aware that we only support input of a dictionary in the following style for `exp_manager`. For more information about `exp_manager`, users can refer to `Recorder: Experiment Management <../component/recorder.html>`_. + .. code-block:: Python # For example, if you want to set your tracking_uri to a , you can initialize qlib below From ea96c9e22de292d10868d20b16517c1622d5e2e5 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 6 Jan 2021 11:49:49 +0800 Subject: [PATCH 70/85] Update docs and support Python 3.9 --- .github/workflows/python-publish.yml | 2 +- .github/workflows/test.yml | 2 +- README.md | 4 ++++ docs/component/recorder.rst | 3 ++- docs/component/workflow.rst | 6 ++++++ setup.py | 3 ++- 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 08d41d198..8b94a2d3b 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: [windows-latest, macos-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 637b0c291..eab6607b1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index 9e3bfce09..25090fc01 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,10 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu cd examples # Avoid running program under the directory contains `qlib` qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml ``` + If users want to use `qrun` under debug mode, please use the following command: + ```bash + python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + ``` The result of `qrun` is as follows, please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. ```bash diff --git a/docs/component/recorder.rst b/docs/component/recorder.rst index baf8f2bca..5e01140cf 100644 --- a/docs/component/recorder.rst +++ b/docs/component/recorder.rst @@ -34,8 +34,9 @@ Here is a general view of the structure of the system: - Recorder 2 - ... - ... -This experiment management system defines a set of interface and provided a concrete implementation based on the machine learning platform: ``MLFlow`` (`link `_). +This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link `_). +If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here `_. Qlib Recorder =================== diff --git a/docs/component/workflow.rst b/docs/component/workflow.rst index 5b81c7e78..96a764de1 100644 --- a/docs/component/workflow.rst +++ b/docs/component/workflow.rst @@ -103,6 +103,12 @@ After saving the config into `configuration.yaml`, users could start the workflo qrun configuration.yaml +If users want to use ``qrun`` under debug mode, please use the following command: + +.. code-block:: bash + + python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + .. note:: `qrun` will be placed in your $PATH directory when installing ``Qlib``. diff --git a/setup.py b/setup.py index 109a6b032..b959f6c1c 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ REQUIRED = [ "statsmodels", "xlrd>=1.0.0", "plotly==4.12.0", - "matplotlib==3.1.3", + "matplotlib==3.3.3", "tables>=3.6.1", "pyyaml>=5.3.1", "mlflow>=1.12.1", @@ -119,5 +119,6 @@ setup( "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], ) From 74e08c9e37c4906aba57731dea710fcaf3dc7926 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 6 Jan 2021 14:01:46 +0800 Subject: [PATCH 71/85] Add deepcopy to config --- qlib/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/config.py b/qlib/config.py index 16df27b95..31b34bacd 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -20,7 +20,7 @@ import multiprocessing class Config: def __init__(self, default_conf): - self.__dict__["_default_config"] = default_conf # avoiding conflictions with __getattr__ + self.__dict__["_default_config"] = copy.deepcopy(default_conf) # avoiding conflictions with __getattr__ self.reset() def __getitem__(self, key): From 054ffa29f6ae255c027bc86410cb4a38ae68d588 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 13 Jan 2021 12:09:14 +0800 Subject: [PATCH 72/85] Update readme --- .github/workflows/python-publish.yml | 2 +- README.md | 17 +++++++++++++++-- setup.py | 3 +-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8b94a2d3b..08d41d198 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: [windows-latest, macos-latest] - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index 25090fc01..60e04091c 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,20 @@ Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how ## Installation -Users can easily install ``Qlib`` by pip according to the following command(Currently, Qlib only support Python 3.6, 3.7 and 3.8). +This table demonstrates the supported Python version of `Qlib`: +| | install with pip | install from source | plot | +| ------------- |:---------------------:|:--------------------:|:----:| +| Python 3.6 | :heavy_check_mark: | :heavy_check_mark: (only with `Anaconda`) | :heavy_check_mark: | +| Python 3.7 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Python 3.8 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Python 3.9 | :x: | :heavy_check_mark: | :x: | + +**Note**: +1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. +2. For Python 3.9, `Qlib` only supports running workflows such as training models and doing backtest. However, plotting are not supported for now and we will fix this when the dependent packages are upgraded in the future. + +### Install with pip +Users can easily install ``Qlib`` by pip according to the following command. ```bash pip install pyqlib @@ -77,6 +90,7 @@ Users can easily install ``Qlib`` by pip according to the following command(Curr **Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below. +### Install from source Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps: * Before installing ``Qlib`` from source, users need to install some dependencies: @@ -85,7 +99,6 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor pip install numpy pip install --upgrade cython ``` - **Note**: Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. * Clone the repository and install ``Qlib`` as follows. * If you haven't installed qlib by the command ``pip install pyqlib`` before: diff --git a/setup.py b/setup.py index b959f6c1c..109a6b032 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ REQUIRED = [ "statsmodels", "xlrd>=1.0.0", "plotly==4.12.0", - "matplotlib==3.3.3", + "matplotlib==3.1.3", "tables>=3.6.1", "pyyaml>=5.3.1", "mlflow>=1.12.1", @@ -119,6 +119,5 @@ setup( "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", ], ) From b34890772ff5620b9ddc6fcc0bd1ac1be95e5b4e Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 13 Jan 2021 15:26:03 +0800 Subject: [PATCH 73/85] Make note more clear --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 60e04091c..735e080a0 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ This table demonstrates the supported Python version of `Qlib`: **Note**: 1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source. -2. For Python 3.9, `Qlib` only supports running workflows such as training models and doing backtest. However, plotting are not supported for now and we will fix this when the dependent packages are upgraded in the future. +2. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future. ### Install with pip Users can easily install ``Qlib`` by pip according to the following command. From b4a088efe88917da75a810a00c8d4d801f2ef58d Mon Sep 17 00:00:00 2001 From: Anon-Artist <61599526+Anon-Artist@users.noreply.github.com> Date: Mon, 21 Dec 2020 09:02:57 +0000 Subject: [PATCH 74/85] Update cli.py --- qlib/workflow/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py index 8270d2db7..f7455797b 100644 --- a/qlib/workflow/cli.py +++ b/qlib/workflow/cli.py @@ -44,7 +44,7 @@ def sys_config(config, config_path): # worflow handler function def workflow(config_path, experiment_name="workflow", uri_folder="mlruns"): with open(config_path) as fp: - config = yaml.load(fp, Loader=yaml.Loader) + config = yaml.load(fp, Loader=yaml.SafeLoader) # config the `sys` section sys_config(config, config_path) From 740c29761875202dc687e0835875ff9030bf40f8 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Sun, 17 Jan 2021 12:08:18 +0000 Subject: [PATCH 75/85] Update pytorch_alstm_ts.py --- qlib/contrib/model/pytorch_alstm_ts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 41be0824d..fabdec831 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -204,8 +204,8 @@ class ALSTM(Model): verbose=True, save_path=None, ): - dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L) + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader @@ -260,7 +260,7 @@ class ALSTM(Model): if not self._fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I) + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) dl_test.config(fillna_type="ffill+bfill") test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) self.ALSTM_model.eval() From fe60e409278a3350d4116a24c899f7a5bf1eae96 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Sun, 17 Jan 2021 12:09:48 +0000 Subject: [PATCH 76/85] Update pytorch_gats_ts.py --- qlib/contrib/model/pytorch_gats_ts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 7b0669dba..c3b8a2f06 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -249,8 +249,8 @@ class GATs(Model): save_path=None, ): - dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L) + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader @@ -332,7 +332,7 @@ class GATs(Model): if not self._fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I) + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) dl_test.config(fillna_type="ffill+bfill") sampler_test = DailyBatchSampler(dl_test) test_loader = DataLoader(dl_test, sampler=sampler_test, num_workers=self.n_jobs) From 9abc0b0d4f4de7bd65d0e6392ba75115089c2d24 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Sun, 17 Jan 2021 12:10:43 +0000 Subject: [PATCH 77/85] Update pytorch_gru_ts.py --- qlib/contrib/model/pytorch_gru_ts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 149c9f8d0..144d97031 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -204,8 +204,8 @@ class GRU(Model): verbose=True, save_path=None, ): - dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L) + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader @@ -260,7 +260,7 @@ class GRU(Model): if not self._fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I) + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) dl_test.config(fillna_type="ffill+bfill") test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) self.GRU_model.eval() From 0524a47cf485fad7af118cde25d9981e51d029d2 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Sun, 17 Jan 2021 12:11:20 +0000 Subject: [PATCH 78/85] Update pytorch_lstm_ts.py --- qlib/contrib/model/pytorch_lstm_ts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index a7f8a2444..26409011f 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -204,8 +204,8 @@ class LSTM(Model): verbose=True, save_path=None, ): - dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L) + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader @@ -260,7 +260,7 @@ class LSTM(Model): if not self._fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I) + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) dl_test.config(fillna_type="ffill+bfill") test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) self.LSTM_model.eval() From 570bb272eb826cac17d460fa8046b19508105880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E9=9B=AA?= Date: Mon, 18 Jan 2021 16:11:15 +0800 Subject: [PATCH 79/85] fix setup error why required pymongo --- scripts/collect_info.py | 1 - setup.py | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/collect_info.py b/scripts/collect_info.py index 512a7a140..874686a3c 100644 --- a/scripts/collect_info.py +++ b/scripts/collect_info.py @@ -33,7 +33,6 @@ class InfoCollector: "scipy", "requests", "sacred", - "pymongo", "python-socketio", "redis", "python-redis-lock", diff --git a/setup.py b/setup.py index 109a6b032..7dcea5da7 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ REQUIRED = [ "scipy>=1.0.0", "requests>=2.18.0", "sacred>=0.7.4", - "pymongo==3.7.2", "python-socketio==3.1.2", "redis>=3.0.1", "python-redis-lock>=3.3.1", From 6a9105e065801626b5c454a592e368c4e4781251 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 14 Jan 2021 14:22:24 +0000 Subject: [PATCH 80/85] add highfreq_backtest --- examples/workflow_with_highfreq_backtest.py | 174 ++++++++++++++++++++ qlib/contrib/backtest/backtest.py | 53 +++++- qlib/contrib/evaluate.py | 55 +++++-- 3 files changed, 261 insertions(+), 21 deletions(-) create mode 100644 examples/workflow_with_highfreq_backtest.py diff --git a/examples/workflow_with_highfreq_backtest.py b/examples/workflow_with_highfreq_backtest.py new file mode 100644 index 000000000..796dc21bb --- /dev/null +++ b/examples/workflow_with_highfreq_backtest.py @@ -0,0 +1,174 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import sys +from pathlib import Path + +import qlib +import pandas as pd +from qlib.config import REG_CN +from qlib.contrib.model.gbdt import LGBModel +from qlib.contrib.data.handler import Alpha158 +from qlib.contrib.strategy.strategy import TopkDropoutStrategy +from qlib.contrib.evaluate import ( + backtest as normal_backtest, + risk_analysis, +) +from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict +from qlib.workflow import R +from qlib.workflow.record_temp import SignalRecord, PortAnaRecord + + +if __name__ == "__main__": + + # use default data + provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir + if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts"))) + from get_data import GetData + + GetData().qlib_data(target_dir=provider_uri, region=REG_CN) + + qlib.init(provider_uri=provider_uri, region=REG_CN) + + market = "csi300" + benchmark = "SH000300" + + ################################### + # train model + ################################### + data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market, + } + + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": 0.8879, + "learning_rate": 0.0421, + "subsample": 0.8789, + "lambda_l1": 205.6999, + "lambda_l2": 580.9768, + "max_depth": 8, + "num_leaves": 210, + "num_threads": 20, + }, + }, + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + }, + } + + highfreq_executor_config = { + "log_dir": '/shared_data/data/v-xiabi/highfreq-exe/log/', + "is_multi": True, + "resources": { + "num_cpus": 48, + "num_gpus": 2, + 'device': 'cpu', + }, + "paths": { + "raw_dir": "/shared_data/data/v-xiabi/highfreq-exe/data/backtest_test_multi/", + "feature_conf": "/shared_data/data/v-xiabi/highfreq-exe/code/rl4execution/config/test_feature_all1620.json", + }, + "env_conf": { + "name": "MARL_Accelerated", + "max_step_num": 237, + "limit": 10, + "time_interval": 30, + "interval_num": 8, + "features": "raw_30", + "max_agent_num": 49, + "log": True, + "obs": { + "name": "MultiTeacherObs", + "config": {} + }, + "action": { + "name": "Multi_Static", + "config": { + 'action_num':5, + 'action_map': [0, 0.25, 0.5, 0.75, 1], + } + }, + "reward": { + "name": "Multi_VP_Penalty_small", + "config": { + "action_penalty": 100, + "hit_penalty": 1., + } + }, + }, + "policy_conf": { + "name": "Multi_RL_backtest", + "config": { + "buy_policy": '/shared_data/data/v-xiabi/highfreq-exe/model/OPDS_buy/policy_best', + 'sell_policy': '/shared_data/data/v-xiabi/highfreq-exe/model/OPDS_sell/policy_best', + }, + }, + } + + port_analysis_config = { + "strategy": { + "class": "TopkDropoutStrategy", + "module_path": "qlib.contrib.strategy.strategy", + "kwargs": { + "topk": 50, + "n_drop": 5, + }, + }, + "backtest": { + "verbose": False, + "limit_threshold": 0.095, + "account": 100000000, + "benchmark": benchmark, + "deal_price": "close", + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5, + "highfreq_executor": { + "class": "Online_Executor", + "module_path": "/shared_data/data/v-xiabi/highfreq-exe/code/rl4execution/executor.py", + "kwargs": highfreq_executor_config, + } + }, + } + + # model initiaiton + model = init_instance_by_config(task["model"]) + dataset = init_instance_by_config(task["dataset"]) + + # start exp + with R.start(experiment_name="workflow"): + R.log_params(**flatten_dict(task)) + model.fit(dataset) + + # prediction + recorder = R.get_recorder() + sr = SignalRecord(model, dataset, recorder) + sr.generate() + + # backtest + par = PortAnaRecord(recorder, port_analysis_config) + par.generate() diff --git a/qlib/contrib/backtest/backtest.py b/qlib/contrib/backtest/backtest.py index 2e785357c..20512ece4 100644 --- a/qlib/contrib/backtest/backtest.py +++ b/qlib/contrib/backtest/backtest.py @@ -15,7 +15,7 @@ from ...data.dataset.utils import get_level_index LOG = get_module_logger("backtest") -def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark): +def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, return_order): """Parameters ---------- pred : pandas.DataFrame @@ -71,7 +71,7 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark) trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift)) executor = SimulatorExecutor(trade_exchange, verbose=verbose) - + order_set = [] # trading apart for pred_date, trade_date in zip(predict_dates, trade_dates): # for loop predict date and trading date @@ -103,6 +103,8 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark) ) else: order_list = [] + + order_set.append((trade_account, order_list, trade_date)) # 4. Get result after executing order list # NOTE: The following operation will modify order.amount. # NOTE: If it is buy and the cash is insufficient, the tradable amount will be recalculated @@ -111,12 +113,49 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark) # 5. Update account information according to transaction update_account(trade_account, trade_info, trade_exchange, trade_date) - # generate backtest report - report_df = trade_account.report.generate_report_dataframe() - report_df["bench"] = bench - positions = trade_account.get_positions() - return report_df, positions + if return_order: + return order_set + else: + # generate backtest report + report_df = trade_account.report.generate_report_dataframe() + report_df["bench"] = bench + positions = trade_account.get_positions() + return report_df, positions +def backtest_highfreq(pred, executor, trade_exchange, shift, order_set, verbose, account, benchmark): + if get_level_index(pred, level="datetime") == 1: + pred = pred.swaplevel().sort_index() + + trade_account_highfreq = Account(init_cash=account) + _pred_dates = pred.index.get_level_values(level="datetime") + predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max()) + + if isinstance(benchmark, pd.Series): + bench = benchmark + else: + _codes = benchmark if isinstance(benchmark, list) else [benchmark] + _temp_result = D.features( + _codes, + ["$close/Ref($close,1)-1"], + predict_dates[0], + get_date_by_shift(predict_dates[-1], shift=shift), + disk_cache=1, + ) + if len(_temp_result) == 0: + raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark") + bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean() + + for trade_account, order_list, trade_date in order_set: + if verbose: + LOG.info("[I {:%Y-%m-%d}]: highfreq trade begin.".format(trade_date)) + ## TODO: kanren group need to merge code here + trade_info = executor.execute(trade_account, order_list, trade_date) + update_account(trade_account_highfreq, trade_info, trade_exchange, trade_date) + + report_df = trade_account_highfreq.report.generate_report_dataframe() + report_df["bench"] = bench + positions = trade_account_highfreq.get_positions() + return report_df, positions def update_account(trade_account, trade_info, trade_exchange, trade_date): """Update the account and strategy diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 07a63b8fb..b63ff3746 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -11,7 +11,7 @@ from ..log import get_module_logger from . import strategy as strategy_pool from .strategy.strategy import BaseStrategy from .backtest.exchange import Exchange -from .backtest.backtest import backtest as backtest_func, get_date_range +from .backtest.backtest import backtest as backtest_func, get_date_range, backtest_highfreq as backtest_highfreq_func from ..data import D from ..config import C @@ -272,19 +272,46 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k ex_args = {k: v for k, v in kwargs.items() if k in spec.args} trade_exchange = get_exchange(pred, **ex_args) - # run backtest - report_df, positions = backtest_func( - pred=pred, - strategy=strategy, - trade_exchange=trade_exchange, - shift=shift, - verbose=verbose, - account=account, - benchmark=benchmark, - ) - # for compatibility of the old API. return the dict positions - positions = {k: p.position for k, p in positions.items()} - return report_df, positions + + if kwargs.get('highfreq_executor', False): + order_set = backtest_func( + pred=pred, + strategy=strategy, + trade_exchange=trade_exchange, + shift=shift, + verbose=verbose, + account=account, + benchmark=benchmark, + return_order=True, + ) + executor = init_instance_by_config(kwargs.get('highfreq_executor')) + report_df, positions = backtest_highfreq_func( + pred=pred, + executor=executor, + trade_exchange=trade_exchange, + shift=shift, + order_set=order_set, + verbose=verbose, + account=account, + benchmark=benchmark + ) + positions = {k: p.position for k, p in positions.items()} + return report_df, positions + else: + # run backtest + report_df, positions = backtest_func( + pred=pred, + strategy=strategy, + trade_exchange=trade_exchange, + shift=shift, + verbose=verbose, + account=account, + benchmark=benchmark, + return_order=False, + ) + # for compatibility of the old API. return the dict positions + positions = {k: p.position for k, p in positions.items()} + return report_df, positions def long_short_backtest( From 917261dbf609c075f71e4b6476fc315b7391d98a Mon Sep 17 00:00:00 2001 From: bxdd Date: Sun, 17 Jan 2021 22:47:58 +0900 Subject: [PATCH 81/85] update backtest --- examples/workflow_with_highfreq_backtest.py | 16 ++++++++-------- qlib/contrib/backtest/backtest.py | 14 +++++++++----- qlib/contrib/evaluate.py | 2 -- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/examples/workflow_with_highfreq_backtest.py b/examples/workflow_with_highfreq_backtest.py index 796dc21bb..682ec7a7f 100644 --- a/examples/workflow_with_highfreq_backtest.py +++ b/examples/workflow_with_highfreq_backtest.py @@ -39,10 +39,10 @@ if __name__ == "__main__": # train model ################################### data_handler_config = { - "start_time": "2008-01-01", - "end_time": "2020-08-01", - "fit_start_time": "2008-01-01", - "fit_end_time": "2014-12-31", + "start_time": "2012-01-01", + "end_time": "2019-06-01", + "fit_start_time": "2012-01-01", + "fit_end_time": "2017-04-30", "instruments": market, } @@ -72,9 +72,9 @@ if __name__ == "__main__": "kwargs": data_handler_config, }, "segments": { - "train": ("2008-01-01", "2014-12-31"), - "valid": ("2015-01-01", "2016-12-31"), - "test": ("2017-01-01", "2020-08-01"), + "train": ("2012-01-01", "2017-04-30"), + "valid": ("2017-05-01", "2019-04-30"), + "test": ("2019-05-01", "2019-06-01"), }, }, }, @@ -89,7 +89,7 @@ if __name__ == "__main__": 'device': 'cpu', }, "paths": { - "raw_dir": "/shared_data/data/v-xiabi/highfreq-exe/data/backtest_test_multi/", + "raw_dir": "/shared_data/data/v-xiabi/highfreq-exe/data/backtest_test_multi", "feature_conf": "/shared_data/data/v-xiabi/highfreq-exe/code/rl4execution/config/test_feature_all1620.json", }, "env_conf": { diff --git a/qlib/contrib/backtest/backtest.py b/qlib/contrib/backtest/backtest.py index 20512ece4..c14699bac 100644 --- a/qlib/contrib/backtest/backtest.py +++ b/qlib/contrib/backtest/backtest.py @@ -123,9 +123,6 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, return report_df, positions def backtest_highfreq(pred, executor, trade_exchange, shift, order_set, verbose, account, benchmark): - if get_level_index(pred, level="datetime") == 1: - pred = pred.swaplevel().sort_index() - trade_account_highfreq = Account(init_cash=account) _pred_dates = pred.index.get_level_values(level="datetime") predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max()) @@ -149,9 +146,16 @@ def backtest_highfreq(pred, executor, trade_exchange, shift, order_set, verbose, if verbose: LOG.info("[I {:%Y-%m-%d}]: highfreq trade begin.".format(trade_date)) ## TODO: kanren group need to merge code here - trade_info = executor.execute(trade_account, order_list, trade_date) - update_account(trade_account_highfreq, trade_info, trade_exchange, trade_date) + print(trade_account, order_list, trade_date) + executor.execute(trade_account, order_list, trade_date) + for trade_account, order_list, trade_date in order_set: + trade_info = executor.get_res() + print(trade_info) + update_account(trade_account_highfreq, trade_info, trade_exchange, trade_date) + if verbose: + LOG.info("[I {:%Y-%m-%d}]: highfreq trade end.".format(trade_date)) + executor.close() report_df = trade_account_highfreq.report.generate_report_dataframe() report_df["bench"] = bench positions = trade_account_highfreq.get_positions() diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index b63ff3746..7232c3854 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -271,8 +271,6 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k spec = inspect.getfullargspec(get_exchange) ex_args = {k: v for k, v in kwargs.items() if k in spec.args} trade_exchange = get_exchange(pred, **ex_args) - - if kwargs.get('highfreq_executor', False): order_set = backtest_func( pred=pred, From 0e0970f06eccba86483f137a7c48f5478b1a4949 Mon Sep 17 00:00:00 2001 From: bxdd Date: Mon, 18 Jan 2021 21:25:04 +0900 Subject: [PATCH 82/85] update backtest --- examples/workflow_by_code.py | 1 + examples/workflow_with_highfreq_backtest.py | 174 -------------------- qlib/contrib/backtest/backtest.py | 66 ++------ qlib/contrib/evaluate.py | 118 ++++++++----- qlib/workflow/record_temp.py | 7 +- 5 files changed, 97 insertions(+), 269 deletions(-) delete mode 100644 examples/workflow_with_highfreq_backtest.py diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py index 8fdb4332f..b8cf3f935 100644 --- a/examples/workflow_by_code.py +++ b/examples/workflow_by_code.py @@ -98,6 +98,7 @@ if __name__ == "__main__": "open_cost": 0.0005, "close_cost": 0.0015, "min_cost": 5, + "return_order": True, }, } diff --git a/examples/workflow_with_highfreq_backtest.py b/examples/workflow_with_highfreq_backtest.py deleted file mode 100644 index 682ec7a7f..000000000 --- a/examples/workflow_with_highfreq_backtest.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import sys -from pathlib import Path - -import qlib -import pandas as pd -from qlib.config import REG_CN -from qlib.contrib.model.gbdt import LGBModel -from qlib.contrib.data.handler import Alpha158 -from qlib.contrib.strategy.strategy import TopkDropoutStrategy -from qlib.contrib.evaluate import ( - backtest as normal_backtest, - risk_analysis, -) -from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict -from qlib.workflow import R -from qlib.workflow.record_temp import SignalRecord, PortAnaRecord - - -if __name__ == "__main__": - - # use default data - provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir - if not exists_qlib_data(provider_uri): - print(f"Qlib data is not found in {provider_uri}") - sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts"))) - from get_data import GetData - - GetData().qlib_data(target_dir=provider_uri, region=REG_CN) - - qlib.init(provider_uri=provider_uri, region=REG_CN) - - market = "csi300" - benchmark = "SH000300" - - ################################### - # train model - ################################### - data_handler_config = { - "start_time": "2012-01-01", - "end_time": "2019-06-01", - "fit_start_time": "2012-01-01", - "fit_end_time": "2017-04-30", - "instruments": market, - } - - task = { - "model": { - "class": "LGBModel", - "module_path": "qlib.contrib.model.gbdt", - "kwargs": { - "loss": "mse", - "colsample_bytree": 0.8879, - "learning_rate": 0.0421, - "subsample": 0.8789, - "lambda_l1": 205.6999, - "lambda_l2": 580.9768, - "max_depth": 8, - "num_leaves": 210, - "num_threads": 20, - }, - }, - "dataset": { - "class": "DatasetH", - "module_path": "qlib.data.dataset", - "kwargs": { - "handler": { - "class": "Alpha158", - "module_path": "qlib.contrib.data.handler", - "kwargs": data_handler_config, - }, - "segments": { - "train": ("2012-01-01", "2017-04-30"), - "valid": ("2017-05-01", "2019-04-30"), - "test": ("2019-05-01", "2019-06-01"), - }, - }, - }, - } - - highfreq_executor_config = { - "log_dir": '/shared_data/data/v-xiabi/highfreq-exe/log/', - "is_multi": True, - "resources": { - "num_cpus": 48, - "num_gpus": 2, - 'device': 'cpu', - }, - "paths": { - "raw_dir": "/shared_data/data/v-xiabi/highfreq-exe/data/backtest_test_multi", - "feature_conf": "/shared_data/data/v-xiabi/highfreq-exe/code/rl4execution/config/test_feature_all1620.json", - }, - "env_conf": { - "name": "MARL_Accelerated", - "max_step_num": 237, - "limit": 10, - "time_interval": 30, - "interval_num": 8, - "features": "raw_30", - "max_agent_num": 49, - "log": True, - "obs": { - "name": "MultiTeacherObs", - "config": {} - }, - "action": { - "name": "Multi_Static", - "config": { - 'action_num':5, - 'action_map': [0, 0.25, 0.5, 0.75, 1], - } - }, - "reward": { - "name": "Multi_VP_Penalty_small", - "config": { - "action_penalty": 100, - "hit_penalty": 1., - } - }, - }, - "policy_conf": { - "name": "Multi_RL_backtest", - "config": { - "buy_policy": '/shared_data/data/v-xiabi/highfreq-exe/model/OPDS_buy/policy_best', - 'sell_policy': '/shared_data/data/v-xiabi/highfreq-exe/model/OPDS_sell/policy_best', - }, - }, - } - - port_analysis_config = { - "strategy": { - "class": "TopkDropoutStrategy", - "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, - }, - "backtest": { - "verbose": False, - "limit_threshold": 0.095, - "account": 100000000, - "benchmark": benchmark, - "deal_price": "close", - "open_cost": 0.0005, - "close_cost": 0.0015, - "min_cost": 5, - "highfreq_executor": { - "class": "Online_Executor", - "module_path": "/shared_data/data/v-xiabi/highfreq-exe/code/rl4execution/executor.py", - "kwargs": highfreq_executor_config, - } - }, - } - - # model initiaiton - model = init_instance_by_config(task["model"]) - dataset = init_instance_by_config(task["dataset"]) - - # start exp - with R.start(experiment_name="workflow"): - R.log_params(**flatten_dict(task)) - model.fit(dataset) - - # prediction - recorder = R.get_recorder() - sr = SignalRecord(model, dataset, recorder) - sr.generate() - - # backtest - par = PortAnaRecord(recorder, port_analysis_config) - par.generate() diff --git a/qlib/contrib/backtest/backtest.py b/qlib/contrib/backtest/backtest.py index c14699bac..b87d6afe3 100644 --- a/qlib/contrib/backtest/backtest.py +++ b/qlib/contrib/backtest/backtest.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd from ...utils import get_date_by_shift, get_date_range -from ..online.executor import SimulatorExecutor from ...data import D from .account import Account from ...config import C @@ -15,7 +14,7 @@ from ...data.dataset.utils import get_level_index LOG = get_module_logger("backtest") -def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, return_order): +def backtest(pred, strategy, executor, trade_exchange, shift, verbose, account, benchmark, return_order): """Parameters ---------- pred : pandas.DataFrame @@ -70,8 +69,8 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean() trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift)) - executor = SimulatorExecutor(trade_exchange, verbose=verbose) - order_set = [] + if return_order: + multi_order_list = [] # trading apart for pred_date, trade_date in zip(predict_dates, trade_dates): # for loop predict date and trading date @@ -103,8 +102,8 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, ) else: order_list = [] - - order_set.append((trade_account, order_list, trade_date)) + if return_order: + multi_order_list.append((trade_account, order_list, trade_date)) # 4. Get result after executing order list # NOTE: The following operation will modify order.amount. # NOTE: If it is buy and the cash is insufficient, the tradable amount will be recalculated @@ -113,53 +112,16 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark, # 5. Update account information according to transaction update_account(trade_account, trade_info, trade_exchange, trade_date) - if return_order: - return order_set - else: - # generate backtest report - report_df = trade_account.report.generate_report_dataframe() - report_df["bench"] = bench - positions = trade_account.get_positions() - return report_df, positions - -def backtest_highfreq(pred, executor, trade_exchange, shift, order_set, verbose, account, benchmark): - trade_account_highfreq = Account(init_cash=account) - _pred_dates = pred.index.get_level_values(level="datetime") - predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max()) - - if isinstance(benchmark, pd.Series): - bench = benchmark - else: - _codes = benchmark if isinstance(benchmark, list) else [benchmark] - _temp_result = D.features( - _codes, - ["$close/Ref($close,1)-1"], - predict_dates[0], - get_date_by_shift(predict_dates[-1], shift=shift), - disk_cache=1, - ) - if len(_temp_result) == 0: - raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark") - bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean() - - for trade_account, order_list, trade_date in order_set: - if verbose: - LOG.info("[I {:%Y-%m-%d}]: highfreq trade begin.".format(trade_date)) - ## TODO: kanren group need to merge code here - print(trade_account, order_list, trade_date) - executor.execute(trade_account, order_list, trade_date) - - for trade_account, order_list, trade_date in order_set: - trade_info = executor.get_res() - print(trade_info) - update_account(trade_account_highfreq, trade_info, trade_exchange, trade_date) - if verbose: - LOG.info("[I {:%Y-%m-%d}]: highfreq trade end.".format(trade_date)) - executor.close() - report_df = trade_account_highfreq.report.generate_report_dataframe() + # generate backtest report + report_df = trade_account.report.generate_report_dataframe() report_df["bench"] = bench - positions = trade_account_highfreq.get_positions() - return report_df, positions + positions = trade_account.get_positions() + + report_dict = {"report_df": report_df, "positions": positions} + if return_order: + report_dict.update({"order_list": multi_order_list}) + return report_dict + def update_account(trade_account, trade_info, trade_exchange, trade_date): """Update the account and strategy diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 7232c3854..44627eef1 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -11,7 +11,8 @@ from ..log import get_module_logger from . import strategy as strategy_pool from .strategy.strategy import BaseStrategy from .backtest.exchange import Exchange -from .backtest.backtest import backtest as backtest_func, get_date_range, backtest_highfreq as backtest_highfreq_func +from .backtest.backtest import backtest as backtest_func, get_date_range +from .online.executor import BaseExecutor, SimulatorExecutor from ..data import D from ..config import C @@ -100,7 +101,7 @@ def get_strategy( "weight": "TopkWeightStrategy", "dropout": "TopkDropoutStrategy", } - logger.info("Create new streategy ") + logger.info("Create new strategy ") str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( topk=topk, @@ -111,6 +112,7 @@ def get_strategy( ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config + logger.info("Create new strategy ") strategy = init_instance_by_config(strategy) # else: nothing happens. 3) Use the strategy directly @@ -196,8 +198,48 @@ def get_exchange( return exchange +def get_executor( + executor=None, + trade_exchange=None, + verbose=True, +): + """get_executor + + There will be 3 ways to return a executor. Please follow the code. + + Parameters + ---------- + + executor : BaseExecutor + executor used in backtest. + trade_exchange : Exchange + exchange used in executor + verbose : bool + whether to print log. + + Returns + ------- + :class: BaseExecutor + an initialized BaseExecutor object + """ + # There will be 3 ways to return a executor. + if executor is None: + # 1) create executor with param `executor` + logger.info("Create new executor ") + executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose) + elif isinstance(executor, (dict, str)): + # 2) create executor with config + logger.info("Create new executor ") + executor = init_instance_by_config(executor) + + # 3) Use the executor directly + if not isinstance(executor, BaseExecutor): + raise TypeError("Executor not supported") + return executor + + # This is the API for compatibility for legacy code -def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs): +def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs): """This function will help you set a reasonable Exchange and provide default value for strategy Parameters ---------- @@ -214,6 +256,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k benchmark code, default is SH000905 CSI 500. verbose : bool whether to print log. + return_order : bool + whther to return order list - **strategy related arguments** @@ -261,6 +305,14 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k will we pass the codes extracted from the pred to the exchange. .. note:: This will be faster with offline qlib. + + - **executor related arguments** + + executor : BaseExecutor() + executor used in backtest. + verbose : bool + whether to print log. + """ # check strategy: spec = inspect.getfullargspec(get_strategy) @@ -271,45 +323,27 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k spec = inspect.getfullargspec(get_exchange) ex_args = {k: v for k, v in kwargs.items() if k in spec.args} trade_exchange = get_exchange(pred, **ex_args) - if kwargs.get('highfreq_executor', False): - order_set = backtest_func( - pred=pred, - strategy=strategy, - trade_exchange=trade_exchange, - shift=shift, - verbose=verbose, - account=account, - benchmark=benchmark, - return_order=True, - ) - executor = init_instance_by_config(kwargs.get('highfreq_executor')) - report_df, positions = backtest_highfreq_func( - pred=pred, - executor=executor, - trade_exchange=trade_exchange, - shift=shift, - order_set=order_set, - verbose=verbose, - account=account, - benchmark=benchmark - ) - positions = {k: p.position for k, p in positions.items()} - return report_df, positions - else: - # run backtest - report_df, positions = backtest_func( - pred=pred, - strategy=strategy, - trade_exchange=trade_exchange, - shift=shift, - verbose=verbose, - account=account, - benchmark=benchmark, - return_order=False, - ) - # for compatibility of the old API. return the dict positions - positions = {k: p.position for k, p in positions.items()} - return report_df, positions + + # init executor: + executor = get_executor(executor=kwargs.get("executor"), trade_exchange=trade_exchange, verbose=verbose) + + # run backtest + report_dict = backtest_func( + pred=pred, + strategy=strategy, + executor=executor, + trade_exchange=trade_exchange, + shift=shift, + verbose=verbose, + account=account, + benchmark=benchmark, + return_order=return_order, + ) + # for compatibility of the old API. return the dict positions + + positions = report_dict.get("positions") + report_dict.update({"positions": {k: p.position for k, p in positions.items()}}) + return report_dict def long_short_backtest( diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index bcbcd3cb4..188857e86 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -241,9 +241,14 @@ class PortAnaRecord(SignalRecord): # custom strategy and get backtest pred_score = super().load() - report_normal, positions_normal = normal_backtest(pred_score, strategy=self.strategy, **self.backtest_config) + report_dict = normal_backtest(pred_score, strategy=self.strategy, **self.backtest_config) + report_normal = report_dict.get("report_df") + positions_normal = report_dict.get("positions") self.recorder.save_objects(**{"report_normal.pkl": report_normal}, artifact_path=PortAnaRecord.get_path()) self.recorder.save_objects(**{"positions_normal.pkl": positions_normal}, artifact_path=PortAnaRecord.get_path()) + order_normal = report_dict.get("order_list") + if order_normal: + self.recorder.save_objects(**{"order_normal.pkl": order_normal}, artifact_path=PortAnaRecord.get_path()) # analysis analysis = dict() From 65d8af41e70b52b249683356b49dabc791387349 Mon Sep 17 00:00:00 2001 From: bxdd Date: Mon, 18 Jan 2021 23:36:05 +0900 Subject: [PATCH 83/85] restructure backtest --- qlib/contrib/backtest/__init__.py | 311 ++++++++++++++++++++++++++++++ qlib/contrib/evaluate.py | 237 +---------------------- qlib/workflow/record_temp.py | 7 +- 3 files changed, 322 insertions(+), 233 deletions(-) diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index 6fcdf2142..483b15d39 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -6,3 +6,314 @@ from .account import Account from .position import Position from .exchange import Exchange from .report import Report +from .backtest import backtest as backtest_func, get_date_range + +import numpy as np +import inspect +from ...utils import init_instance_by_config +from ...log import get_module_logger +from ...config import C + +logger = get_module_logger("backtest caller") + +def get_strategy( + strategy=None, + topk=50, + margin=0.5, + n_drop=5, + risk_degree=0.95, + str_type="dropout", + adjust_dates=None, +): + """get_strategy + + There will be 3 ways to return a stratgy. Please follow the code. + + + Parameters + ---------- + + strategy : Strategy() + strategy used in backtest. + topk : int (Default value: 50) + top-N stocks to buy. + margin : int or float(Default value: 0.5) + - if isinstance(margin, int): + + sell_limit = margin + + - else: + + sell_limit = pred_in_a_day.count() * margin + + buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit). + sell_limit should be no less than topk. + n_drop : int + number of stocks to be replaced in each trading date. + risk_degree: float + 0-1, 0.95 for example, use 95% money to trade. + str_type: 'amount', 'weight' or 'dropout' + strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy. + + Returns + ------- + :class: Strategy + an initialized strategy object + """ + + + # There will be 3 ways to return a strategy. + if strategy is None: + # 1) create strategy with param `strategy` + str_cls_dict = { + "amount": "TopkAmountStrategy", + "weight": "TopkWeightStrategy", + "dropout": "TopkDropoutStrategy", + } + logger.info("Create new strategy ") + from .. import strategy as strategy_pool + str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) + strategy = str_cls( + topk=topk, + buffer_margin=margin, + n_drop=n_drop, + risk_degree=risk_degree, + adjust_dates=adjust_dates, + ) + elif isinstance(strategy, (dict, str)): + # 2) create strategy with init_instance_by_config + logger.info("Create new strategy ") + strategy = init_instance_by_config(strategy) + + from ..strategy.strategy import BaseStrategy + # else: nothing happens. 3) Use the strategy directly + if not isinstance(strategy, BaseStrategy): + raise TypeError("Strategy not supported") + return strategy + + +def get_exchange( + pred, + exchange=None, + subscribe_fields=[], + open_cost=0.0015, + close_cost=0.0025, + min_cost=5.0, + trade_unit=None, + limit_threshold=None, + deal_price=None, + extract_codes=False, + shift=1, +): + """get_exchange + + Parameters + ---------- + + # exchange related arguments + exchange: Exchange(). + subscribe_fields: list + subscribe fields. + open_cost : float + open transaction cost. + close_cost : float + close transaction cost. + min_cost : float + min transaction cost. + trade_unit : int + 100 for China A. + deal_price: str + dealing price type: 'close', 'open', 'vwap'. + limit_threshold : float + limit move 0.1 (10%) for example, long and short with same limit. + extract_codes: bool + will we pass the codes extracted from the pred to the exchange. + NOTE: This will be faster with offline qlib. + + Returns + ------- + :class: Exchange + an initialized Exchange object + """ + + if trade_unit is None: + trade_unit = C.trade_unit + if limit_threshold is None: + limit_threshold = C.limit_threshold + if deal_price is None: + deal_price = C.deal_price + if exchange is None: + logger.info("Create new exchange") + # handle exception for deal_price + if deal_price[0] != "$": + deal_price = "$" + deal_price + if extract_codes: + codes = sorted(pred.index.get_level_values("instrument").unique()) + else: + codes = "all" # TODO: We must ensure that 'all.txt' includes all the stocks + + dates = sorted(pred.index.get_level_values("datetime").unique()) + dates = np.append(dates, get_date_range(dates[-1], left_shift=1, right_shift=shift)) + + exchange = Exchange( + trade_dates=dates, + codes=codes, + deal_price=deal_price, + subscribe_fields=subscribe_fields, + limit_threshold=limit_threshold, + open_cost=open_cost, + close_cost=close_cost, + min_cost=min_cost, + trade_unit=trade_unit, + ) + return exchange + + +def get_executor( + executor=None, + trade_exchange=None, + verbose=True, +): + """get_executor + + There will be 3 ways to return a executor. Please follow the code. + + Parameters + ---------- + + executor : BaseExecutor + executor used in backtest. + trade_exchange : Exchange + exchange used in executor + verbose : bool + whether to print log. + + Returns + ------- + :class: BaseExecutor + an initialized BaseExecutor object + """ + + # There will be 3 ways to return a executor. + if executor is None: + # 1) create executor with param `executor` + logger.info("Create new executor ") + from ..online.executor import SimulatorExecutor + executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose) + elif isinstance(executor, (dict, str)): + # 2) create executor with config + logger.info("Create new executor ") + executor = init_instance_by_config(executor) + + from ..online.executor import BaseExecutor + # 3) Use the executor directly + if not isinstance(executor, BaseExecutor): + raise TypeError("Executor not supported") + return executor + +# This is the API for compatibility for legacy code +def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs): + """This function will help you set a reasonable Exchange and provide default value for strategy + Parameters + ---------- + + - **backtest workflow related or commmon arguments** + + pred : pandas.DataFrame + predict should has index and one `score` column. + account : float + init account value. + shift : int + whether to shift prediction by one day. + benchmark : str + benchmark code, default is SH000905 CSI 500. + verbose : bool + whether to print log. + return_order : bool + whether to return order list + + - **strategy related arguments** + + strategy : Strategy() + strategy used in backtest. + topk : int (Default value: 50) + top-N stocks to buy. + margin : int or float(Default value: 0.5) + - if isinstance(margin, int): + + sell_limit = margin + + - else: + + sell_limit = pred_in_a_day.count() * margin + + buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit). + sell_limit should be no less than topk. + n_drop : int + number of stocks to be replaced in each trading date. + risk_degree: float + 0-1, 0.95 for example, use 95% money to trade. + str_type: 'amount', 'weight' or 'dropout' + strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy. + + - **exchange related arguments** + + exchange: Exchange() + pass the exchange for speeding up. + subscribe_fields: list + subscribe fields. + open_cost : float + open transaction cost. The default value is 0.002(0.2%). + close_cost : float + close transaction cost. The default value is 0.002(0.2%). + min_cost : float + min transaction cost. + trade_unit : int + 100 for China A. + deal_price: str + dealing price type: 'close', 'open', 'vwap'. + limit_threshold : float + limit move 0.1 (10%) for example, long and short with same limit. + extract_codes: bool + will we pass the codes extracted from the pred to the exchange. + + .. note:: This will be faster with offline qlib. + + - **executor related arguments** + + executor : BaseExecutor() + executor used in backtest. + verbose : bool + whether to print log. + + """ + # check strategy: + spec = inspect.getfullargspec(get_strategy) + str_args = {k: v for k, v in kwargs.items() if k in spec.args} + strategy = get_strategy(**str_args) + + # init exchange: + spec = inspect.getfullargspec(get_exchange) + ex_args = {k: v for k, v in kwargs.items() if k in spec.args} + trade_exchange = get_exchange(pred, **ex_args) + + # init executor: + executor = get_executor(executor=kwargs.get("executor"), trade_exchange=trade_exchange, verbose=verbose) + + # run backtest + report_dict = backtest_func( + pred=pred, + strategy=strategy, + executor=executor, + trade_exchange=trade_exchange, + shift=shift, + verbose=verbose, + account=account, + benchmark=benchmark, + return_order=return_order, + ) + # for compatibility of the old API. return the dict positions + + positions = report_dict.get("positions") + report_dict.update({"positions": {k: p.position for k, p in positions.items()}}) + return report_dict \ No newline at end of file diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 44627eef1..6ac7511a7 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -6,18 +6,15 @@ from __future__ import print_function import numpy as np import pandas as pd -import inspect +import warnings from ..log import get_module_logger -from . import strategy as strategy_pool -from .strategy.strategy import BaseStrategy -from .backtest.exchange import Exchange -from .backtest.backtest import backtest as backtest_func, get_date_range -from .online.executor import BaseExecutor, SimulatorExecutor +from .backtest import get_exchange, backtest as backtest_func +from .backtest.backtest import get_date_range from ..data import D from ..config import C from ..data.dataset.utils import get_level_index -from ..utils import init_instance_by_config + logger = get_module_logger("Evaluate") @@ -48,198 +45,9 @@ def risk_analysis(r, N=252): return res -def get_strategy( - strategy=None, - topk=50, - margin=0.5, - n_drop=5, - risk_degree=0.95, - str_type="dropout", - adjust_dates=None, -): - """get_strategy - - There will be 3 ways to return a stratgy. Please follow the code. - - - Parameters - ---------- - - strategy : Strategy() - strategy used in backtest. - topk : int (Default value: 50) - top-N stocks to buy. - margin : int or float(Default value: 0.5) - - if isinstance(margin, int): - - sell_limit = margin - - - else: - - sell_limit = pred_in_a_day.count() * margin - - buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit). - sell_limit should be no less than topk. - n_drop : int - number of stocks to be replaced in each trading date. - risk_degree: float - 0-1, 0.95 for example, use 95% money to trade. - str_type: 'amount', 'weight' or 'dropout' - strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy. - - Returns - ------- - :class: Strategy - an initialized strategy object - """ - - # There will be 3 ways to return a strategy. - if strategy is None: - # 1) create strategy with param `strategy` - str_cls_dict = { - "amount": "TopkAmountStrategy", - "weight": "TopkWeightStrategy", - "dropout": "TopkDropoutStrategy", - } - logger.info("Create new strategy ") - str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) - strategy = str_cls( - topk=topk, - buffer_margin=margin, - n_drop=n_drop, - risk_degree=risk_degree, - adjust_dates=adjust_dates, - ) - elif isinstance(strategy, (dict, str)): - # 2) create strategy with init_instance_by_config - logger.info("Create new strategy ") - strategy = init_instance_by_config(strategy) - - # else: nothing happens. 3) Use the strategy directly - if not isinstance(strategy, BaseStrategy): - raise TypeError("Strategy not supported") - return strategy - - -def get_exchange( - pred, - exchange=None, - subscribe_fields=[], - open_cost=0.0015, - close_cost=0.0025, - min_cost=5.0, - trade_unit=None, - limit_threshold=None, - deal_price=None, - extract_codes=False, - shift=1, -): - """get_exchange - - Parameters - ---------- - - # exchange related arguments - exchange: Exchange(). - subscribe_fields: list - subscribe fields. - open_cost : float - open transaction cost. - close_cost : float - close transaction cost. - min_cost : float - min transaction cost. - trade_unit : int - 100 for China A. - deal_price: str - dealing price type: 'close', 'open', 'vwap'. - limit_threshold : float - limit move 0.1 (10%) for example, long and short with same limit. - extract_codes: bool - will we pass the codes extracted from the pred to the exchange. - NOTE: This will be faster with offline qlib. - - Returns - ------- - :class: Exchange - an initialized Exchange object - """ - - if trade_unit is None: - trade_unit = C.trade_unit - if limit_threshold is None: - limit_threshold = C.limit_threshold - if deal_price is None: - deal_price = C.deal_price - if exchange is None: - logger.info("Create new exchange") - # handle exception for deal_price - if deal_price[0] != "$": - deal_price = "$" + deal_price - if extract_codes: - codes = sorted(pred.index.get_level_values("instrument").unique()) - else: - codes = "all" # TODO: We must ensure that 'all.txt' includes all the stocks - - dates = sorted(pred.index.get_level_values("datetime").unique()) - dates = np.append(dates, get_date_range(dates[-1], left_shift=1, right_shift=shift)) - - exchange = Exchange( - trade_dates=dates, - codes=codes, - deal_price=deal_price, - subscribe_fields=subscribe_fields, - limit_threshold=limit_threshold, - open_cost=open_cost, - close_cost=close_cost, - min_cost=min_cost, - trade_unit=trade_unit, - ) - return exchange - - -def get_executor( - executor=None, - trade_exchange=None, - verbose=True, -): - """get_executor - - There will be 3 ways to return a executor. Please follow the code. - - Parameters - ---------- - - executor : BaseExecutor - executor used in backtest. - trade_exchange : Exchange - exchange used in executor - verbose : bool - whether to print log. - - Returns - ------- - :class: BaseExecutor - an initialized BaseExecutor object - """ - # There will be 3 ways to return a executor. - if executor is None: - # 1) create executor with param `executor` - logger.info("Create new executor ") - executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose) - elif isinstance(executor, (dict, str)): - # 2) create executor with config - logger.info("Create new executor ") - executor = init_instance_by_config(executor) - - # 3) Use the executor directly - if not isinstance(executor, BaseExecutor): - raise TypeError("Executor not supported") - return executor - # This is the API for compatibility for legacy code -def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs): +def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs): """This function will help you set a reasonable Exchange and provide default value for strategy Parameters ---------- @@ -256,8 +64,6 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, ret benchmark code, default is SH000905 CSI 500. verbose : bool whether to print log. - return_order : bool - whther to return order list - **strategy related arguments** @@ -314,36 +120,9 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, ret whether to print log. """ - # check strategy: - spec = inspect.getfullargspec(get_strategy) - str_args = {k: v for k, v in kwargs.items() if k in spec.args} - strategy = get_strategy(**str_args) - - # init exchange: - spec = inspect.getfullargspec(get_exchange) - ex_args = {k: v for k, v in kwargs.items() if k in spec.args} - trade_exchange = get_exchange(pred, **ex_args) - - # init executor: - executor = get_executor(executor=kwargs.get("executor"), trade_exchange=trade_exchange, verbose=verbose) - - # run backtest - report_dict = backtest_func( - pred=pred, - strategy=strategy, - executor=executor, - trade_exchange=trade_exchange, - shift=shift, - verbose=verbose, - account=account, - benchmark=benchmark, - return_order=return_order, - ) - # for compatibility of the old API. return the dict positions - - positions = report_dict.get("positions") - report_dict.update({"positions": {k: p.position for k, p in positions.items()}}) - return report_dict + warnings.warn("this function is deprecated, please use backtest function in qlib.contrib.backtest", DeprecationWarning) + report_dict = backtest_func(pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs) + return report_dict.get("report_df"), report_dict.get("positions") def long_short_backtest( diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 188857e86..be458a24d 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -5,10 +5,9 @@ import re import pandas as pd from pathlib import Path from pprint import pprint -from ..contrib.evaluate import ( - backtest as normal_backtest, - risk_analysis, -) +from ..contrib.evaluate import risk_analysis +from ..contrib.backtest import backtest as normal_backtest + from ..data.dataset import DatasetH from ..data.dataset.handler import DataHandlerLP from ..utils import init_instance_by_config, get_module_by_module_path From cc214a346203829aba2f8d6ba061a5c9c14b2706 Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 19 Jan 2021 01:06:17 +0900 Subject: [PATCH 84/85] black format --- qlib/contrib/backtest/__init__.py | 11 ++++++++--- qlib/contrib/evaluate.py | 9 ++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index 483b15d39..aa24ffb0c 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -16,6 +16,7 @@ from ...config import C logger = get_module_logger("backtest caller") + def get_strategy( strategy=None, topk=50, @@ -61,7 +62,6 @@ def get_strategy( an initialized strategy object """ - # There will be 3 ways to return a strategy. if strategy is None: # 1) create strategy with param `strategy` @@ -72,6 +72,7 @@ def get_strategy( } logger.info("Create new strategy ") from .. import strategy as strategy_pool + str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( topk=topk, @@ -86,6 +87,7 @@ def get_strategy( strategy = init_instance_by_config(strategy) from ..strategy.strategy import BaseStrategy + # else: nothing happens. 3) Use the strategy directly if not isinstance(strategy, BaseStrategy): raise TypeError("Strategy not supported") @@ -193,12 +195,13 @@ def get_executor( :class: BaseExecutor an initialized BaseExecutor object """ - + # There will be 3 ways to return a executor. if executor is None: # 1) create executor with param `executor` logger.info("Create new executor ") from ..online.executor import SimulatorExecutor + executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose) elif isinstance(executor, (dict, str)): # 2) create executor with config @@ -206,11 +209,13 @@ def get_executor( executor = init_instance_by_config(executor) from ..online.executor import BaseExecutor + # 3) Use the executor directly if not isinstance(executor, BaseExecutor): raise TypeError("Executor not supported") return executor + # This is the API for compatibility for legacy code def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs): """This function will help you set a reasonable Exchange and provide default value for strategy @@ -316,4 +321,4 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, ret positions = report_dict.get("positions") report_dict.update({"positions": {k: p.position for k, p in positions.items()}}) - return report_dict \ No newline at end of file + return report_dict diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 6ac7511a7..4aa5b5515 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -45,7 +45,6 @@ def risk_analysis(r, N=252): return res - # This is the API for compatibility for legacy code def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs): """This function will help you set a reasonable Exchange and provide default value for strategy @@ -120,8 +119,12 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k whether to print log. """ - warnings.warn("this function is deprecated, please use backtest function in qlib.contrib.backtest", DeprecationWarning) - report_dict = backtest_func(pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs) + warnings.warn( + "this function is deprecated, please use backtest function in qlib.contrib.backtest", DeprecationWarning + ) + report_dict = backtest_func( + pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs + ) return report_dict.get("report_df"), report_dict.get("positions") From 3403c00b6b4faaeb6056d706bc9814bd7c4b502b Mon Sep 17 00:00:00 2001 From: bxdd <45119470+bxdd@users.noreply.github.com> Date: Tue, 19 Jan 2021 20:35:11 +0800 Subject: [PATCH 85/85] Update requirements.txt fix readthedocs cant find cmake error --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 08e80edd6..5f27c38e7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,5 @@ Cython +cmake numpy scipy -scikit-learn \ No newline at end of file +scikit-learn