1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-19 20:21:44 +08:00

Compare commits

...

24 Commits

Author SHA1 Message Date
Linlang Lv (iSoftStone)
ec3a54f91a test_ci 2023-07-18 16:41:56 +08:00
Linlang Lv (iSoftStone)
11a0f260ba test_ci 2023-07-18 13:15:06 +08:00
Linlang Lv (iSoftStone)
0f954e3580 test_ci 2023-07-18 12:36:40 +08:00
Linlang Lv (iSoftStone)
858d0a4d3d test_ci 2023-07-18 12:29:46 +08:00
Linlang Lv (iSoftStone)
c861bf82cd test_ci 2023-07-18 12:27:50 +08:00
Linlang Lv (iSoftStone)
49bb942336 test_ci 2023-07-18 12:24:58 +08:00
Linlang Lv (iSoftStone)
c2d9c4a30b test_ci 2023-07-18 11:53:54 +08:00
Linlang Lv (iSoftStone)
719af93a87 test_ci 2023-07-18 11:49:59 +08:00
Linlang Lv (iSoftStone)
79815f1b7b test_ci 2023-07-18 11:48:59 +08:00
Linlang Lv (iSoftStone)
891bccb096 test_ci 2023-07-18 11:38:09 +08:00
Linlang Lv (iSoftStone)
159addf023 test_ci 2023-07-18 11:35:23 +08:00
Linlang Lv (iSoftStone)
080b42a19e test_ci 2023-07-18 11:33:26 +08:00
Linlang Lv (iSoftStone)
d5ee587a96 test_ci 2023-07-18 11:31:21 +08:00
Linlang Lv (iSoftStone)
9e0039226b test_ci 2023-07-18 11:29:08 +08:00
Linlang Lv (iSoftStone)
4f09d87a74 test_ci 2023-07-18 11:27:09 +08:00
Linlang Lv (iSoftStone)
71a5fc3753 test_ci 2023-07-18 10:39:07 +08:00
Linlang Lv (iSoftStone)
5b3c86bd63 test_ci 2023-07-18 10:32:07 +08:00
you-n-g
be4646b4b7 Adjust rolling api (#1594)
* Intermediate version

* Fix yaml template & Successfully run rolling

* Be compatible with benchmark

* Get same results with previous linear model

* Black formatting

* Update black

* Update the placeholder mechanism

* Update CI

* Update CI

* Upgrade Black

* Fix CI and simplify code

* Fix CI

* Move the data processing caching mechanism into utils.

* Adjusting DDG-DA

* Organize import
2023-07-14 12:16:12 +08:00
you-n-g
8d3adf34ac Postpone PR stale. (#1591) 2023-07-12 09:59:09 +08:00
Lewen Wang
b1dfc77ad7 Update qlibrl docs. (#1588)
* Update qlibrl docs.

* Update docs/component/rl/guidance.rst

* Update docs/component/rl/guidance.rst

* Update docs/component/rl/guidance.rst

---------

Co-authored-by: Litzy <litzy0619owned@gmail.com>
Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
2023-07-07 15:40:03 +08:00
Yang
3e074c8435 fix download token (#1577) 2023-07-06 12:38:52 +08:00
Linlang
b7e5f63a07 fix_pip_ci (#1584)
* fix_pip_ci

* fix_ci_get_data_error

---------

Co-authored-by: Linlang <v-linlanglv@microsoft.com>
2023-07-05 21:23:15 +08:00
you-n-g
4db30b1225 Update README.md for RL (#1573)
* Update README.md

* Update README.md
2023-06-28 10:53:58 +08:00
you-n-g
b1e7b19a3d Update __init__.py 2023-06-27 11:55:40 +08:00
155 changed files with 1138 additions and 1314 deletions

View File

@@ -38,7 +38,7 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/*
deploy_with_manylinux:
runs-on: ubuntu-latest
steps:

View File

@@ -18,7 +18,8 @@ jobs:
stale-issue-label: 'stale'
stale-pr-label: 'stale'
days-before-stale: 90
days-before-pr-stale: 365
days-before-close: 5
operations-per-run: 100
exempt-issue-labels: 'bug,enhancement'
remove-stale-when-updated: true
remove-stale-when-updated: true

View File

@@ -1,57 +0,0 @@
name: Test qlib from pip
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
timeout-minutes: 120
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
steps:
- name: Test qlib from pip
uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Update pip to the latest version
run: |
python -m pip install --upgrade pip
- name: Qlib installation test
run: |
python -m pip install pyqlib
# Specify the numpy version because the numpy upgrade caused the CI test to fail,
# and this line of code will be removed when the next version of qlib is released.
python -m pip install "numpy<1.23"
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
- name: Downloads dependencies data
run: |
python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
- name: Test workflow by config
run: |
qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

View File

@@ -14,160 +14,42 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
os: [windows-latest, macos-11]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
steps:
- name: Test qlib from source
uses: actions/checkout@v3
uses: actions/checkout@v2
# Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
# So we make the version number of python 3.7 for MacOS more specific.
# refs: https://github.com/actions/setup-python/issues/682
- name: Set up Python ${{ matrix.python-version }}
if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
uses: actions/setup-python@v4
if: matrix.os == 'macos-11' && matrix.python-version == '3.7'
uses: actions/setup-python@v2
with:
python-version: "3.7.16"
- name: Set up Python ${{ matrix.python-version }}
if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
uses: actions/setup-python@v4
if: matrix.os == 'macos-11' && matrix.python-version == '3.8'
uses: actions/setup-python@v2
with:
python-version: "3.8.16"
- name: Set up Python ${{ matrix.python-version }}
if: matrix.os != 'macos-11'
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Update pip to the latest version
# pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
# The pip version has been temporarily fixed to 23.0
- name: Install dependencies
run: |
python -m pip install pip==23.0
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Installing pytorch for macos
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
- name: Build wheel on ${{ matrix.os }}
run: |
python -m pip install torch torchvision torchaudio
- name: Installing pytorch for ubuntu
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-22.04' }}
run: |
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- name: Installing pytorch for windows
if: ${{ matrix.os == 'windows-latest' }}
run: |
python -m pip install torch torchvision torchaudio
- name: Set up Python tools
run: |
python -m pip install --upgrade cython
python -m pip install -e .[dev]
- name: Lint with Black
run: |
black . -l 120 --check --diff
- name: Make html with sphinx
run: |
cd docs
sphinx-build -W --keep-going -b html . _build
cd ..
# Check Qlib with pylint
# TODO: These problems we will solve in the future. Important among them are: W0221, W0223, W0237, E1102
# C0103: invalid-name
# C0209: consider-using-f-string
# R0402: consider-using-from-import
# R1705: no-else-return
# R1710: inconsistent-return-statements
# R1725: super-with-arguments
# R1735: use-dict-literal
# W0102: dangerous-default-value
# W0212: protected-access
# W0221: arguments-differ
# W0223: abstract-method
# W0231: super-init-not-called
# W0237: arguments-renamed
# W0612: unused-variable
# W0621: redefined-outer-name
# W0622: redefined-builtin
# FIXME: specify exception type
# W0703: broad-except
# W1309: f-string-without-interpolation
# E1102: not-callable
# E1136: unsubscriptable-object
# References for parameters: https://github.com/PyCQA/pylint/issues/4577#issuecomment-1000245962
# We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
- name: Check Qlib with pylint
run: |
pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
# The following flake8 error codes were ignored:
# E501 line too long
# Description: We have used black to limit the length of each line to 120.
# F541 f-string is missing placeholders
# Description: The same thing is done when using pylint for detection.
# E266 too many leading '#' for block comment
# Description: To make the code more readable, a lot of "#" is used.
# This error code appears centrally in:
# qlib/backtest/executor.py
# qlib/data/ops.py
# qlib/utils/__init__.py
# E402 module level import not at top of file
# Description: There are times when module level import is not available at the top of the file.
# W503 line break before binary operator
# Description: Since black formats the length of each line of code, it has to perform a line break when a line of arithmetic is too long.
# E731 do not assign a lambda expression, use a def
# Description: Restricts the use of lambda expressions, but at some point lambda expressions are required.
# E203 whitespace before ':'
# Description: If there is whitespace before ":", it cannot pass the black check.
- name: Check Qlib with flake8
run: |
flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 --per-file-ignores="__init__.py:F401,F403" qlib
# https://github.com/python/mypy/issues/10600
- name: Check Qlib with mypy
run: |
mypy qlib --install-types --non-interactive || true
mypy qlib --verbose
- name: Check Qlib ipynb with nbqa
run: |
nbqa black . -l 120 --check --diff
nbqa pylint . --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136,W0719,W0104,W0404,C0412,W0611,C0410 --const-rgx='[a-z_][a-z0-9_]{2,30}$'
- name: Test data downloads
run: |
python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
python scripts/get_data.py download_data --file_name rl_data.zip --target_dir tests/.data/rl
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
# Run after data downloads
- name: Check Qlib ipynb with nbconvert
run: |
# add more ipynb files in future
jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb
- name: Test workflow by config (install from source)
run: |
python -m pip install numba
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
- name: Unit tests with Pytest
uses: nick-fields/retry@v2
with:
timeout_minutes: 60
max_attempts: 3
command: |
cd tests
python -m pytest . -m "not slow" --durations=0
python -m pip install "numpy<1.24.0"
python -m pip install cython==0.29.36
python -m pip install setuptools wheel
python setup.py bdist_wheel
python -c "from pathlib import Path; current_directory = Path.cwd(); dist_directory = current_directory / 'dist'; files = dist_directory.glob('*'); [print(file.name) for file in files]"
python -c "import sysconfig; print(sysconfig.get_platform())"

View File

@@ -1,70 +0,0 @@
name: Test qlib from source slow
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
timeout-minutes: 720
# we may retry for 3 times for `Unit tests with Pytest`
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
steps:
- name: Test qlib from source slow
uses: actions/checkout@v3
# Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
# So we make the version number of python 3.7 for MacOS more specific.
# refs: https://github.com/actions/setup-python/issues/682
- name: Set up Python ${{ matrix.python-version }}
if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
uses: actions/setup-python@v4
with:
python-version: "3.7.16"
- name: Set up Python ${{ matrix.python-version }}
if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Set up Python tools
# pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
# The pip version has been temporarily fixed to 23.0
run: |
python -m pip install pip==23.0
pip install --upgrade cython numpy
pip install -e .[dev]
- name: Downloads dependencies data
run: |
python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
- name: Unit tests with Pytest
uses: nick-fields/retry@v2
with:
timeout_minutes: 240
max_attempts: 3
command: |
cd tests
python -m pytest . -m "slow" --durations=0

View File

@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
rev: 23.7.0
hooks:
- id: black
args: ["qlib", "-l 120"]
@@ -9,4 +9,4 @@ repos:
rev: 4.0.1
hooks:
- id: flake8
args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]

View File

@@ -91,6 +91,7 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
</ul>
</li>
<li type="circle"><a href="#adapting-to-market-dynamics">Adapting to Market Dynamics</a></li>
<li type="circle"><a href="#reinforcement-learning-modeling-continuous-decisions">Reinforcement Learning: modeling continuous decisions</a></li>
</ul>
</li>
</td>
@@ -392,6 +393,17 @@ Here is a list of solutions built on `Qlib`.
- [Rolling Retraining](examples/benchmarks_dynamic/baseline/)
- [DDG-DA on pytorch (Wendi, et al. AAAI 2022)](examples/benchmarks_dynamic/DDG-DA/)
## Reinforcement Learning: modeling continuous decisions
Qlib now supports reinforcement learning, a feature designed to model continuous investment decisions. This functionality assists investors in optimizing their trading strategies by learning from interactions with the environment to maximize some notion of cumulative reward.
Here is a list of solutions built on `Qlib` categorized by scenarios.
### [RL for order execution](examples/rl_order_execution)
[Here](https://qlib.readthedocs.io/en/latest/component/rl/overall.html#order-execution) is the introduction of this scenario. All the methods below are compared [here](examples/rl_order_execution).
- [TWAP](examples/rl_order_execution/exp_configs/backtest_twap.yml)
- [PPO: "An End-to-End Optimal Trade Execution Framework based on Proximal Policy Optimization", IJCAL 2020](examples/rl_order_execution/exp_configs/backtest_ppo.yml)
- [OPDS: "Universal Trading for Order Execution with Oracle Policy Distillation", AAAI 2021](examples/rl_order_execution/exp_configs/backtest_opds.yml)
# Quant Dataset Zoo
Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`:

View File

@@ -0,0 +1,32 @@
========
Guidance
========
.. currentmodule:: qlib
QlibRL can help users quickly get started and conveniently implement quantitative strategies based on reinforcement learning(RL) algorithms. For different user groups, we recommend the following guidance to use QlibRL.
Beginners to Reinforcement Learning Algorithms
==============================================
Whether you are a quantitative researcher who wants to understand what RL can do in trading or a learner who wants to get started with RL algorithms in trading scenarios, if you have limited knowledge of RL and want to shield various detailed settings to quickly get started with RL algorithms, we recommend the following sequence to learn qlibrl:
- Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
- Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
- Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
- If you want to further explore QlibRL and make some customizations, you need to first understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_ and rewrite specific components according to your needs.
Reinforcement Learning Algorithm Researcher
==============================================
If you are already familiar with existing RL algorithms and dedicated to researching RL algorithms but lack domain knowledge in the financial field, and you want to validate the effectiveness of your algorithms in financial trading scenarios, we recommend the following steps to get started with QlibRL:
- Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
- Choose an RL application scenario (currently, QlibRL has implemented two scenario examples: order execution and algorithmic trading). Run the example in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to get it working.
- Modify the `policy <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/policy.py>`_ part to incorporate your own RL algorithm.
Quantitative Researcher
=======================
If you have a certain level of financial domain knowledge and coding skills, and you want to explore the application of RL algorithms in the investment field, we recommend the following steps to explore QlibRL:
- Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
- Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
- Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
- Understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_.
- Choose a suitable RL algorithm based on the characteristics of the problem you want to solve (currently, QlibRL supports PPO and DQN algorithms based on tianshou).
- Design the MDP (Markov Decision Process) process based on market trading rules and the problem you want to solve. Refer to the example in order execution and make corresponding modifications to the following modules: `State <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L70>`_, `Metrics <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L18>`_, `ActionInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L199>`_, `StateInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L68>`_, `Reward <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/reward.py>`_, `Observation <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L44>`_, `Simulator <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/simulator_simple.py>`_.

View File

@@ -4,7 +4,7 @@ Reinforcement Learning in Quantitative Trading
Reinforcement Learning
======================
Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning,
Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning(RL),
which attempts to optimize an accumulative numerical reward signal by directly interacting with the environment under a few assumptions such as Markov Decision Process(MDP).
As demonstrated in the following figure, an RL system consists of four elements, 1)the agent 2) the environment the agent interacts with 3) the policy that the agent follows to take actions on the environment and 4)the reward signal from the environment to the agent.
@@ -25,26 +25,46 @@ The Qlib Reinforcement Learning toolkit (QlibRL) is an RL platform for quantitat
Potential Application Scenarios in Quantitative Trading
=======================================================
RL methods have already achieved outstanding achievement in many applications, such as game playing, resource allocating, recommendation, marketing and advertising, etc.
Investment is always a continuous process, taking the stock market as an example, investors need to control their positions and stock holdings by one or more buying and selling behaviors, to maximize the investment returns.
Besides, each buy and sell decision is made by investors after fully considering the overall market information and stock information.
From the view of an investor, the process could be described as a continuous decision-making process generated according to interaction with the market, such problems could be solved by the RL algorithms.
Following are some scenarios where RL can potentially be used in quantitative investment.
Portfolio Construction
----------------------
Portfolio construction is a process of selecting securities optimally by taking a minimum risk to achieve maximum returns. With an RL-based solution, an agent allocates stocks at every time step by obtaining information for each stock and the market. The key is to develop of policy for building a portfolio and make the policy able to pick the optimal portfolio.
RL methods have demonstrated remarkable achievements in various applications, including game playing, resource allocation, recommendation systems, marketing, and advertising.
In the context of investment, which involves continuous decision-making, let's consider the example of the stock market. Investors strive to optimize their investment returns by effectively managing their positions and stock holdings through various buying and selling behaviors.
Furthermore, investors carefully evaluate market conditions and stock-specific information before making each buying or selling decision. From an investor's perspective, this process can be viewed as a continuous decision-making process driven by interactions with the market. RL algorithms offer a promising approach to tackle such challenges.
Here are several scenarios where RL holds potential for application in quantitative investment.
Order Execution
---------------
As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Essentially, the goal of order execution is twofold: it not only requires to fulfill the whole order but also targets a more economical execution with maximizing profit gain (or minimizing capital loss). The order execution with only one order of liquidation or acquirement is called single-asset order execution.
The order execution task is to execute orders efficiently while considering multiple factors, including optimal prices, minimizing trading costs, reducing market impact, maximizing order fullfill rates, and achieving execution within a specified time frame. RL can be applied to such tasks by incorporating these objectives into the reward function and action selection process. Specifically, the RL agent interacts with the market environment, observes the state from market information, and makes decisions on next step execution. The RL algorithm learns an optimal execution strategy through trial and error, aiming to maximize the expected cumulative reward, which incorporates the desired objectives.
Considering stock investment always aim to pursue long-term maximized profits, it usually manifests as a sequential process of continuously adjusting the asset portfolios, execution for multiple orders, including order of liquidation and acquirement, brings more constraints and makes the sequence of execution for different orders should be considered, e.g. before executing an order to buy some stocks, we have to sell at least one stock. The order execution with multiple assets is called multi-asset order execution.
- General Setting
- Environment: The environment represents the financial market where order execution takes place. It encompasses variables such as the order book dynamics, liquidity, price movements, and market conditions.
According to the order executions trait of sequential decision-making, an RL-based solution could be applied to solve the order execution. With an RL-based solution, an agent optimizes execution strategy by interacting with the market environment.
- State: The state refers to the information available to the RL agent at a given time step. It typically includes features such as the current order book state (bid-ask spread, order depth), historical price data, historical trading volume, market volatility, and any other relevant information that can aid in decision-making.
With QlibRL, the RL algorithm in the above scenarios can be easily implemented.
- Action: The action is the decision made by the RL agent based on the observed state. In order execution, actions can include selecting the order size, price, and timing of execution.
Nested Portfolio Construction and Order Executor
------------------------------------------------
QlibRL makes it possible to jointly optimize different levels of strategies/models/agents. Take `Nested Decision Execution Framework <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution>`_ as an example, the optimization of order execution strategy and portfolio management strategies can interact with each other to maximize returns.
- Reward: The reward is a scalar signal that indicates the performance of the RL agent's action in the environment. The reward function is designed to encourage actions that lead to efficient and cost-effective order execution. It typically considers multiple objectives, such as maximizing price advantages, minimizing trading costs (including transaction fees and slippage), reducing market impact (the effect of the order on the market price) and maximizing order fullfill rates.
- Scenarios
- Single-asset order execution: Single-asset order execution focuses on the task of executing a single order for a specific asset, such as a stock or a cryptocurrency. The primary objective is to execute the order efficiently while considering factors such as maximizing price advantages, minimizing trading costs, reducing market impact, and achieving a high fullfill rate. The RL agent interacts with the market environment and makes decisions on order size, price, and timing of execution for that particular asset. The goal is to learn an optimal execution strategy for the single asset, maximizing the expected cumulative reward while considering the specific dynamics and characteristics of that asset.
- Multi-asset order execution: Multi-asset order execution expands the order execution task to involve multiple assets or securities. It typically involves executing a portfolio of orders across different assets simultaneously or sequentially. Unlike single-asset order execution, the focus is not only on the execution of individual orders but also on managing the interactions and dependencies between different assets within the portfolio. The RL agent needs to make decisions on the order sizes, prices, and timings for each asset in the portfolio, considering their interdependencies, cash constraints, market conditions, and transaction costs. The goal is to learn an optimal execution strategy that balances the execution efficiency for each asset while considering the overall performance and objectives of the portfolio as a whole.
The choice of settings and RL algorithm depends on the specific requirements of the task, available data, and desired performance objectives.
Portfolio Construction
----------------------
Portfolio construction is a process of selecting and allocating assets in an investment portfolio. RL provides a framework to optimize portfolio management decisions by learning from interactions with the market environment and maximizing long-term returns while considering risk management.
- General Setting
- State: The state represents the current information about the market and the portfolio. It typically includes historical prices and volumes, technical indicators, and other relevant data.
- Action: The action corresponds to the decision of allocating capital to different assets in the portfolio. It determines the weights or proportions of investments in each asset.
- Reward: The reward is a metric that evaluates the performance of the portfolio. It can be defined in various ways, such as total return, risk-adjusted return, or other objectives like maximizing Sharpe ratio or minimizing drawdown.
- Scenarios
- Stock market: RL can be used to construct portfolios of stocks, where the agent learns to allocate capital among different stocks.
- Cryptocurrency market: RL can be applied to construct portfolios of cryptocurrencies, where the agent learns to make allocation decisions.
- Foreign exchange (Forex) market: RL can be used to construct portfolios of currency pairs, where the agent learns to allocate capital across different currencies based on exchange rate data, economic indicators, and other factors.
Similarly, the choice of basic setting and algorithm depends on the specific requirements of the problem and the characteristics of the market.

View File

@@ -5,6 +5,7 @@ Reinforcement Learning in Quantitative Trading
========================================================================
.. toctree::
Guidance <guidance>
Overall <overall>
Quick Start <quickstart>
Framework <framework>

View File

@@ -53,9 +53,7 @@ Below is a typical config file of ``qrun``.
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
backtest:
limit_threshold: 0.095
account: 100000000
@@ -281,9 +279,7 @@ The following script is the configuration of `backtest` and the `strategy` used
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
backtest:
limit_threshold: 0.095
account: 100000000

View File

@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -35,9 +35,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:
@@ -89,4 +87,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -48,7 +48,6 @@ class Avg15minHandler(DataHandlerLP):
)
def loader_config(self):
# Results for dataset: df: pd.DataFrame
# len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
# df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16

View File

@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -33,9 +33,7 @@ port_analysis_config: &port_analysis_config
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
backtest:
verbose: False
limit_threshold: 0.095

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -31,9 +31,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,8 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -30,9 +30,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:
@@ -95,4 +93,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -139,7 +139,6 @@ class GenericDataFormatter(abc.ABC):
# Sanity checks first.
# Ensure only one ID and time column exist
def _check_single_column(input_type):
length = len([tup for tup in column_definition if tup[2] == input_type])
if length != 1:

View File

@@ -78,7 +78,6 @@ class ExperimentConfig:
@property
def hyperparam_iterations(self):
return 240 if self.experiment == "volatility" else 60
def make_data_formatter(self):

View File

@@ -88,7 +88,6 @@ class HyperparamOptManager:
params_file = os.path.join(self.hyperparam_folder, "params.csv")
if os.path.exists(results_file) and os.path.exists(params_file):
self.results = pd.read_csv(results_file, index_col=0)
self.saved_params = pd.read_csv(params_file, index_col=0)
@@ -178,7 +177,6 @@ class HyperparamOptManager:
return parameters
for _ in range(self._max_tries):
parameters = _get_next()
name = self._get_name(parameters)

View File

@@ -475,7 +475,6 @@ class TemporalFusionTransformer:
embeddings = []
for i in range(num_categorical_variables):
embedding = tf.keras.Sequential(
[
tf.keras.layers.InputLayer([time_steps]),
@@ -680,7 +679,6 @@ class TemporalFusionTransformer:
data_map = {}
for _, sliced in data.groupby(id_col):
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
for k in col_mappings:
@@ -954,7 +952,6 @@ class TemporalFusionTransformer:
"""
with tf.variable_scope(self.name):
transformer_layer, all_inputs, attention_components = self._build_base_graph()
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(

View File

@@ -16,9 +16,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -6,7 +6,6 @@ from qlib.utils import init_instance_by_config
def main(seed, config_file="configs/config_alstm.yaml"):
# set random seed
with open(config_file) as f:
config = yaml.safe_load(f)
@@ -30,7 +29,6 @@ def main(seed, config_file="configs/config_alstm.yaml"):
if __name__ == "__main__":
# set params from cmd
parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument("--seed", type=int, default=1000, help="random seed")

View File

@@ -96,7 +96,6 @@ class MTSDatasetH(DatasetH):
drop_last=False,
**kwargs,
):
assert horizon > 0, "please specify `horizon` to avoid data leakage"
self.seq_len = seq_len
@@ -111,7 +110,6 @@ class MTSDatasetH(DatasetH):
super().__init__(handler, segments, **kwargs)
def setup_data(self, handler_kwargs: dict = None, **kwargs):
super().setup_data()
# change index to <code, date>

View File

@@ -45,7 +45,6 @@ class TRAModel(Model):
avg_params=True,
**kwargs,
):
np.random.seed(seed)
torch.manual_seed(seed)
@@ -93,7 +92,6 @@ class TRAModel(Model):
self.global_step = -1
def train_epoch(self, data_set):
self.model.train()
self.tra.train()
@@ -146,7 +144,6 @@ class TRAModel(Model):
return total_loss
def test_epoch(self, data_set, return_pred=False):
self.model.eval()
self.tra.eval()
data_set.eval()
@@ -204,7 +201,6 @@ class TRAModel(Model):
return metrics, preds
def fit(self, dataset, evals_result=dict()):
train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
best_score = -1
@@ -380,7 +376,6 @@ class LSTM(nn.Module):
self.output_size = hidden_size
def forward(self, x):
x = self.input_drop(x)
if self.training and self.noise_level > 0:
@@ -464,7 +459,6 @@ class Transformer(nn.Module):
self.output_size = hidden_size
def forward(self, x):
x = self.input_drop(x)
if self.training and self.noise_level > 0:
@@ -514,7 +508,6 @@ class TRA(nn.Module):
self.predictors = nn.Linear(input_size, num_states)
def forward(self, hidden, hist_loss):
preds = self.predictors(hidden)
if self.num_states == 1:

View File

@@ -57,9 +57,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -16,12 +16,12 @@ Though the dataset is different, the conclusion remains the same. By applying `D
# Run the Code
Users can try `DDG-DA` by running the following command:
```bash
python workflow.py run_all
python workflow.py run
```
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
```bash
python workflow.py --forecast_model="gbdt" run_all
python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
```
# Results

View File

@@ -1,305 +1,40 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from pathlib import Path
from qlib.model.meta.task import MetaTask
from qlib.contrib.meta.data_selection.model import MetaModelDS
from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
from qlib.data.dataset.handler import DataHandlerLP
from typing import Union
import pandas as pd
import fire
import sys
import pickle
from typing import Optional
from qlib import auto_init
from qlib.model.trainer import TrainerR
from qlib.typehint import Literal
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.contrib.rolling.ddgda import DDGDA
from qlib.tests.data import GetData
DIRNAME = Path(__file__).absolute().resolve().parent
sys.path.append(str(DIRNAME.parent / "baseline"))
from rolling_benchmark import RollingBenchmark # NOTE: sys.path is changed for import RollingBenchmark
BENCH_DIR = DIRNAME.parent / "baseline"
class DDGDA:
"""
please run `python workflow.py run_all` to run the full workflow of the experiment
class DDGDABench(DDGDA):
# The config in the README.md
CONF_LIST = [
BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
]
**NOTE**
before running the example, please clean your previous results with following command
- `rm -r mlruns`
"""
DEFAULT_CONF = CONF_LIST[0] # Linear by default due to efficiency
def __init__(
self,
sim_task_model: Literal["linear", "gbdt"] = "gbdt",
forecast_model: Literal["linear", "gbdt"] = "linear",
h_path: Optional[str] = None,
test_end: Optional[str] = None,
train_start: Optional[str] = None,
meta_1st_train_end: Optional[str] = None,
task_ext_conf: Optional[dict] = None,
alpha: float = 0.01,
proxy_hd: str = "handler_proxy.pkl",
):
"""
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
# This code is for being compatible with the previous old code
conf_path = Path(conf_path)
super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
Parameters
----------
train_start: Optional[str]
the start datetime for data. It is used in training start time (for both tasks & meta learing)
test_end: Optional[str]
the end datetime for data. It is used in test end time
meta_1st_train_end: Optional[str]
the datetime of training end of the first meta_task
alpha: float
Setting the L2 regularization for ridge
The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
"""
self.step = 20
# NOTE:
# the horizon must match the meaning in the base task template
self.horizon = 20
self.meta_exp_name = "DDG-DA"
self.sim_task_model = sim_task_model # The model to capture the distribution of data.
self.forecast_model = forecast_model # downstream forecasting models' type
self.rb_kwargs = {
"h_path": h_path,
"test_end": test_end,
"train_start": train_start,
"task_ext_conf": task_ext_conf,
}
self.alpha = alpha
self.meta_1st_train_end = meta_1st_train_end
self.proxy_hd = proxy_hd
def get_feature_importance(self):
# this must be lightGBM, because it needs to get the feature importance
rb = RollingBenchmark(model_type="gbdt", **self.rb_kwargs)
task = rb.basic_task()
with R.start(experiment_name="feature_importance"):
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset)
fi = model.get_feature_importance()
# Because the model use numpy instead of dataframe for training lightgbm
# So the we must use following extra steps to get the right feature importance
df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
cols = df.columns
fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
return pd.Series(fi_named)
def dump_data_for_proxy_model(self):
"""
Dump data for training meta model.
The meta model will be trained upon the proxy forecasting model.
This dataset is for the proxy forecasting model.
"""
topk = 30
fi = self.get_feature_importance()
col_selected = fi.nlargest(topk)
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
task = rb.basic_task()
dataset = init_instance_by_config(task["dataset"])
prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
feature_df = prep_ds["feature"]
label_df = prep_ds["label"]
feature_selected = feature_df.loc[:, col_selected.index]
feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
lambda df: (df - df.mean()).div(df.std())
)
feature_selected = feature_selected.fillna(0.0)
df_all = {
"label": label_df.reindex(feature_selected.index),
"feature": feature_selected,
}
df_all = pd.concat(df_all, axis=1)
df_all.to_pickle(DIRNAME / "fea_label_df.pkl")
# dump data in handler format for aligning the interface
handler = DataHandlerLP(
data_loader={
"class": "qlib.data.dataset.loader.StaticDataLoader",
"kwargs": {"config": DIRNAME / "fea_label_df.pkl"},
}
)
handler.to_pickle(DIRNAME / self.proxy_hd, dump_all=True)
@property
def _internal_data_path(self):
return DIRNAME / f"internal_data_s{self.step}.pkl"
def dump_meta_ipt(self):
"""
Dump data for training meta model.
This function will dump the input data for meta model
"""
# According to the experiments, the choice of the model type is very important for achieving good results
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
sim_task = rb.basic_task()
if self.sim_task_model == "gbdt":
sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
exp_name_sim = f"data_sim_s{self.step}"
internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
internal_data.setup(trainer=TrainerR)
with self._internal_data_path.open("wb") as f:
pickle.dump(internal_data, f)
def train_meta_model(self, fill_method="max"):
"""
training a meta model based on a simplified linear proxy model;
"""
# 1) leverage the simplified proxy forecasting model to train meta model.
# - Only the dataset part is important, in current version of meta model will integrate the
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
sim_task = rb.basic_task()
# the train_start for training meta model does not necessarily align with final rolling
train_start = "2008-01-01" if self.rb_kwargs.get("train_start") is None else self.rb_kwargs.get("train_start")
train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
proxy_forecast_model_task = {
# "model": "qlib.contrib.model.linear.LinearModel",
"dataset": {
"class": "qlib.data.dataset.DatasetH",
"kwargs": {
"handler": f"file://{(DIRNAME / self.proxy_hd).absolute()}",
"segments": {
"train": (train_start, train_end),
"test": (test_start, sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
},
},
},
# "record": ["qlib.workflow.record_temp.SignalRecord"]
}
# the proxy_forecast_model_task will be used to create meta tasks.
# The test date of first task will be 2011-01-01. Each test segment will be about 20days
# The tasks include all training tasks and test tasks.
# 2) preparing meta dataset
kwargs = dict(
task_tpl=proxy_forecast_model_task,
step=self.step,
segments=0.62, # keep test period consistent with the dataset yaml
trunc_days=1 + self.horizon,
hist_step_n=30,
fill_method=fill_method,
rolling_ext_days=0,
)
# NOTE:
# the input of meta model (internal data) are shared between proxy model and final forecasting model
# but their task test segment are not aligned! It worked in my previous experiment.
# So the misalignment will not affect the effectiveness of the method.
with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f)
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
# 3) train and logging meta model
with R.start(experiment_name=self.meta_exp_name):
R.log_params(**kwargs)
mm = MetaModelDS(
step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
)
mm.fit(md)
R.save_objects(model=mm)
@property
def _task_path(self):
return DIRNAME / f"tasks_s{self.step}.pkl"
def meta_inference(self):
"""
Leverage meta-model for inference:
- Given
- baseline tasks
- input for meta model(internal data)
- meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
"""
# 1) get meta model
exp = R.get_exp(experiment_name=self.meta_exp_name)
rec = exp.list_recorders(rtype=exp.RT_L)[0]
meta_model: MetaModelDS = rec.load_object("model")
# 2)
# we are transfer to knowledge of meta model to final forecasting tasks.
# Create MetaTaskDataset for the final forecasting tasks
# Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
# 2.1) get previous config
param = rec.list_params()
trunc_days = int(param["trunc_days"])
step = int(param["step"])
hist_step_n = int(param["hist_step_n"])
fill_method = param.get("fill_method", "max")
rb = RollingBenchmark(model_type=self.forecast_model, **self.rb_kwargs)
task_l = rb.create_rolling_tasks()
# 2.2) create meta dataset for final dataset
kwargs = dict(
task_tpl=task_l,
step=step,
segments=0.0, # all the tasks are for testing
trunc_days=trunc_days,
hist_step_n=hist_step_n,
fill_method=fill_method,
task_mode=MetaTask.PROC_MODE_TRANSFER,
)
with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f)
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
# 3) meta model make inference and get new qlib task
new_tasks = meta_model.inference(mds)
with self._task_path.open("wb") as f:
pickle.dump(new_tasks, f)
def train_and_eval_tasks(self):
"""
Training the tasks generated by meta model
Then evaluate it
"""
with self._task_path.open("rb") as f:
tasks = pickle.load(f)
rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.forecast_model, **self.rb_kwargs)
rb.train_rolling_tasks(tasks)
rb.ens_rolling()
rb.update_rolling_rec()
def run_all(self):
# 1) file: handler_proxy.pkl (self.proxy_hd)
self.dump_data_for_proxy_model()
# 2)
# file: internal_data_s20.pkl
# mlflow: data_sim_s20, models for calculating meta_ipt
self.dump_meta_ipt()
# 3) meta model will be stored in `DDG-DA`
self.train_meta_model()
# 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
self.meta_inference()
# 5) load the saved tasks and train model
self.train_and_eval_tasks()
for f in self.CONF_LIST:
if conf_path.samefile(f):
break
else:
self.logger.warning("Model type is not in the benchmark!")
if __name__ == "__main__":
GetData().qlib_data(exists_skip=True)
auto_init()
fire.Fire(DDGDA)
fire.Fire(DDGDABench)

View File

@@ -5,11 +5,12 @@ This is the framework of periodically Rolling Retrain (RR) forecasting models. R
## Run the Code
Users can try RR by running the following command:
```bash
python rolling_benchmark.py run_all
python rolling_benchmark.py run
```
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
For example, users can try `LightGBM` forecasting models by running the following command:
```bash
python rolling_benchmark.py --model_type="gbdt" run_all
```
python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
```

View File

@@ -1,161 +1,33 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from typing import Optional
from qlib.model.ens.ensemble import RollingEnsemble
from qlib.utils import init_instance_by_config
import fire
import yaml
import pandas as pd
from qlib import auto_init
from pathlib import Path
from tqdm.auto import tqdm
from qlib.model.trainer import TrainerR
from qlib.log import get_module_logger
from qlib.utils.data import update_config
from qlib.workflow import R
from typing import Union
import fire
from qlib import auto_init
from qlib.contrib.rolling.base import Rolling
from qlib.tests.data import GetData
DIRNAME = Path(__file__).absolute().resolve().parent
from qlib.workflow.task.gen import task_generator, RollingGen
from qlib.workflow.task.collect import RecorderCollector
from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord
class RollingBenchmark:
"""
**NOTE**
before running the example, please clean your previous results with following command
- `rm -r mlruns`
class RollingBenchmark(Rolling):
# The config in the README.md
CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
"""
DEFAULT_CONF = CONF_LIST[0]
def __init__(
self,
rolling_exp: str = "rolling_models",
model_type: str = "linear",
h_path: Optional[str] = None,
train_start: Optional[str] = None,
test_end: Optional[str] = None,
task_ext_conf: Optional[dict] = None,
) -> None:
"""
Parameters
----------
rolling_exp : str
The name for the experiments for rolling
model_type : str
The model to be boosted.
h_path : Optional[str]
the dumped data handler;
test_end : Optional[str]
the test end for the data. It is typically used together with the handler
train_start : Optional[str]
the train start for the data. It is typically used together with the handler.
task_ext_conf : Optional[dict]
some option to update the
"""
self.step = 20
self.horizon = 20
self.rolling_exp = rolling_exp
self.model_type = model_type
self.h_path = h_path
self.train_start = train_start
self.test_end = test_end
self.logger = get_module_logger("RollingBenchmark")
self.task_ext_conf = task_ext_conf
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
# This code is for being compatible with the previous old code
conf_path = Path(conf_path)
super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
def basic_task(self):
"""For fast training rolling"""
if self.model_type == "gbdt":
conf_path = DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"
# dump the processed data on to disk for later loading to speed up the processing
h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
elif self.model_type == "linear":
# We use ridge regression to stabilize the performance
conf_path = DIRNAME / "workflow_config_linear_Alpha158.yaml"
h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
for f in self.CONF_LIST:
if conf_path.samefile(f):
break
else:
raise AssertionError("Model type is not supported!")
if self.h_path is not None:
h_path = Path(self.h_path)
with conf_path.open("r") as f:
conf = yaml.safe_load(f)
# modify dataset horizon
conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
"Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
]
task = conf["task"]
if self.task_ext_conf is not None:
task = update_config(task, self.task_ext_conf)
if not h_path.exists():
h_conf = task["dataset"]["kwargs"]["handler"]
h = init_instance_by_config(h_conf)
h.to_pickle(h_path, dump_all=True)
task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
if self.train_start is not None:
seg = task["dataset"]["kwargs"]["segments"]["train"]
task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
if self.test_end is not None:
seg = task["dataset"]["kwargs"]["segments"]["test"]
task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
self.logger.info(task)
return task
def create_rolling_tasks(self):
task = self.basic_task()
task_l = task_generator(
task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
) # the last two days should be truncated to avoid information leakage
return task_l
def train_rolling_tasks(self, task_l=None):
if task_l is None:
task_l = self.create_rolling_tasks()
trainer = TrainerR(experiment_name=self.rolling_exp)
trainer(task_l)
COMB_EXP = "rolling"
def ens_rolling(self):
rc = RecorderCollector(
experiment=self.rolling_exp,
artifacts_key=["pred", "label"],
process_list=[RollingEnsemble()],
# rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
)
res = rc()
with R.start(experiment_name=self.COMB_EXP):
R.log_params(exp_name=self.rolling_exp)
R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
def update_rolling_rec(self):
"""
Evaluate the combined rolling results
"""
for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
for rt_cls in SigAnaRecord, PortAnaRecord:
rt = rt_cls(recorder=rec, skip_existing=True)
rt.generate()
print(f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`.")
def run_all(self):
# the results will be save in mlruns.
# 1) each rolling task is saved in rolling_models
self.train_rolling_tasks()
# 2) combined rolling tasks and evaluation results are saved in rolling
self.ens_rolling()
self.update_rolling_rec()
self.logger.warning("Model type is not in the benchmark!")
if __name__ == "__main__":

View File

@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
signal: <PRED>
topk: 50
n_drop: 5
backtest:

View File

@@ -14,7 +14,6 @@ class HighFreqHandler(DataHandlerLP):
fit_end_time=None,
drop_raw=True,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

View File

@@ -18,7 +18,6 @@ from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Se
class HighfreqWorkflow:
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
MARKET = "all"

View File

@@ -35,7 +35,6 @@ def objective(trial):
if __name__ == "__main__":
provider_uri = "~/.qlib/qlib_data/cn_data"
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region="cn")

View File

@@ -38,7 +38,6 @@ def objective(trial):
if __name__ == "__main__":
provider_uri = "~/.qlib/qlib_data/cn_data"
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region=REG_CN)

View File

@@ -11,7 +11,6 @@ from qlib.tests.config import CSI300_GBDT_TASK
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)

View File

@@ -9,7 +9,6 @@ from qlib.model.riskmodel import StructuredCovEstimator
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
price_all = (
@@ -20,7 +19,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
riskmodel = StructuredCovEstimator()
for i in range(T - 1, len(price_all)):
date = price_all.index[i]
ref_date = price_all.index[i - T + 1]
@@ -47,7 +45,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
if __name__ == "__main__":
import qlib
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")

View File

@@ -13,7 +13,6 @@ from qlib.tests.data import GetData
class RollingDataWorkflow:
MARKET = "csi300"
start_time = "2010-01-01"
end_time = "2019-12-31"
@@ -93,7 +92,6 @@ class RollingDataWorkflow:
dataset = init_instance_by_config(dataset_config)
for rolling_offset in range(self.rolling_cnt):
print(f"===========rolling{rolling_offset} start===========")
if rolling_offset:
dataset.config(

View File

@@ -17,7 +17,6 @@ from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License.
from pathlib import Path
__version__ = "0.9.2"
__version__ = "0.9.2.99"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os
from typing import Union
@@ -77,7 +77,6 @@ def init(default_conf="client", **kwargs):
def _mount_nfs_uri(provider_uri, mount_path, auto_mount: bool = False):
LOG = get_module_logger("mount nfs", level=logging.INFO)
if mount_path is None:
raise ValueError(f"Invalid mount path: {mount_path}!")

View File

@@ -182,7 +182,6 @@ def get_strategy_executor(
exchange_kwargs: dict = {},
pos_type: str = "Position",
) -> Tuple[BaseStrategy, BaseExecutor]:
# NOTE:
# - for avoiding recursive import
# - typing annotations is not reliable

View File

@@ -638,7 +638,6 @@ class Exchange:
random.seed(0)
random.shuffle(sorted_ids)
for stock_id in sorted_ids:
# Do not generate order for the non-tradable stocks
if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
continue

View File

@@ -293,7 +293,6 @@ class QlibConfig(Config):
"""
def __init__(self, provider_uri: Union[str, Path, dict], mount_path: Union[str, Path, dict]):
"""
The relation of `provider_uri` and `mount_path`
- `mount_path` is used only if provider_uri is an NFS path

View File

@@ -130,7 +130,6 @@ class MTSDatasetH(DatasetH):
input_size=None,
**kwargs,
):
assert num_states == 0 or horizon > 0, "please specify `horizon` to avoid data leakage"
assert memory_mode in ["sample", "daily"], "unsupported memory mode"
assert memory_mode == "sample" or batch_size < 0, "daily memory requires daily sampling (`batch_size < 0`)"
@@ -153,7 +152,6 @@ class MTSDatasetH(DatasetH):
super().__init__(handler, segments, **kwargs)
def setup_data(self, handler_kwargs: dict = None, **kwargs):
super().setup_data(**kwargs)
if handler_kwargs is not None:
@@ -288,7 +286,6 @@ class MTSDatasetH(DatasetH):
daily_count = [] # store number of samples for each day
for j in indices[i : i + batch_size]:
# normal sampling: self.batch_size > 0 => slices is a list => slices_subset is a slice
# daily sampling: self.batch_size < 0 => slices is a nested list => slices_subset is a list
slices_subset = slices[j]
@@ -297,7 +294,6 @@ class MTSDatasetH(DatasetH):
# each slices_subset contains a list of slices for multiple stocks
# NOTE: daily sampling is used in 1) eval mode, 2) train mode with self.batch_size < 0
if self.batch_size < 0:
# store daily index
idx = self._daily_index.index[j] # daily_index.index is the index of the original data
daily_index.append(idx)
@@ -320,7 +316,6 @@ class MTSDatasetH(DatasetH):
slices_subset = [slices_subset]
for slc in slices_subset:
# legacy support for Alpha360 data by `input_size`
if self.input_size:
data.append(self._data[slc.stop - 1].reshape(self.input_size, -1).T)

View File

@@ -17,7 +17,6 @@ class HighFreqHandler(DataHandlerLP):
fit_end_time=None,
drop_raw=True,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -318,7 +317,6 @@ class HighFreqOrderHandler(DataHandlerLP):
inst_processors=None,
drop_raw=True,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

View File

@@ -29,7 +29,6 @@ class HighFreqNorm(Processor):
feature_save_dir: str,
norm_groups: Dict[str, int],
):
self.fit_start_time = fit_start_time
self.fit_end_time = fit_end_time
self.feature_save_dir = feature_save_dir

View File

@@ -49,6 +49,8 @@ class InternalData:
# 1) prepare the prediction of proxy models
perf_task_tpl = deepcopy(self.task_tpl) # this task is supposed to contains no complicated objects
# The only thing we want to save is the prediction
perf_task_tpl["record"] = ["qlib.workflow.record_temp.SignalRecord"]
trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs)
# NOTE:

View File

@@ -246,7 +246,6 @@ class ADARNN(Model):
evals_result=dict(),
save_path=None,
):
df_train, df_valid = dataset.prepare(
["train", "valid"],
col_set=["feature", "label"],
@@ -318,7 +317,6 @@ class ADARNN(Model):
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:

View File

@@ -146,7 +146,6 @@ class ALSTM(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -155,7 +154,6 @@ class ALSTM(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def train_epoch(self, x_train, y_train):
x_train_values = x_train.values
y_train_values = np.squeeze(y_train.values)
@@ -165,7 +163,6 @@ class ALSTM(Model):
np.random.shuffle(indices)
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
@@ -181,7 +178,6 @@ class ALSTM(Model):
self.train_optimizer.step()
def test_epoch(self, data_x, data_y):
# prepare training data
x_values = data_x.values
y_values = np.squeeze(data_y.values)
@@ -194,7 +190,6 @@ class ALSTM(Model):
indices = np.arange(len(x_values))
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
@@ -217,7 +212,6 @@ class ALSTM(Model):
evals_result=dict(),
save_path=None,
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"],
col_set=["feature", "label"],
@@ -282,7 +276,6 @@ class ALSTM(Model):
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:

View File

@@ -156,7 +156,6 @@ class ALSTM(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -165,10 +164,9 @@ class ALSTM(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def train_epoch(self, data_loader):
self.ALSTM_model.train()
for (data, weight) in data_loader:
for data, weight in data_loader:
feature = data[:, :, 0:-1].to(self.device)
label = data[:, -1, -1].to(self.device)
@@ -181,14 +179,12 @@ class ALSTM(Model):
self.train_optimizer.step()
def test_epoch(self, data_loader):
self.ALSTM_model.eval()
scores = []
losses = []
for (data, weight) in data_loader:
for data, weight in data_loader:
feature = data[:, :, 0:-1].to(self.device)
# feature[torch.isnan(feature)] = 0
label = data[:, -1, -1].to(self.device)
@@ -295,7 +291,6 @@ class ALSTM(Model):
preds = []
for data in test_loader:
feature = data[:, :, 0:-1].to(self.device)
with torch.no_grad():

View File

@@ -154,7 +154,6 @@ class GATs(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -175,7 +174,6 @@ class GATs(Model):
return daily_index, daily_count
def train_epoch(self, x_train, y_train):
x_train_values = x_train.values
y_train_values = np.squeeze(y_train.values)
self.GAT_model.train()
@@ -197,7 +195,6 @@ class GATs(Model):
self.train_optimizer.step()
def test_epoch(self, data_x, data_y):
# prepare training data
x_values = data_x.values
y_values = np.squeeze(data_y.values)
@@ -230,7 +227,6 @@ class GATs(Model):
evals_result=dict(),
save_path=None,
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"],
col_set=["feature", "label"],

View File

@@ -32,7 +32,6 @@ class DailyBatchSampler(Sampler):
self.daily_index[0] = 0
def __iter__(self):
for idx, count in zip(self.daily_index, self.daily_count):
yield np.arange(idx, idx + count)
@@ -173,7 +172,6 @@ class GATs(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -194,11 +192,9 @@ class GATs(Model):
return daily_index, daily_count
def train_epoch(self, data_loader):
self.GAT_model.train()
for data in data_loader:
data = data.squeeze()
feature = data[:, :, 0:-1].to(self.device)
label = data[:, -1, -1].to(self.device)
@@ -212,14 +208,12 @@ class GATs(Model):
self.train_optimizer.step()
def test_epoch(self, data_loader):
self.GAT_model.eval()
scores = []
losses = []
for data in data_loader:
data = data.squeeze()
feature = data[:, :, 0:-1].to(self.device)
# feature[torch.isnan(feature)] = 0
@@ -240,7 +234,6 @@ class GATs(Model):
evals_result=dict(),
save_path=None,
):
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
if dl_train.empty or dl_valid.empty:
@@ -329,7 +322,6 @@ class GATs(Model):
preds = []
for data in test_loader:
data = data.squeeze()
feature = data[:, :, 0:-1].to(self.device)

View File

@@ -146,7 +146,6 @@ class GRU(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -155,7 +154,6 @@ class GRU(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def train_epoch(self, x_train, y_train):
x_train_values = x_train.values
y_train_values = np.squeeze(y_train.values)
@@ -165,7 +163,6 @@ class GRU(Model):
np.random.shuffle(indices)
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
@@ -181,7 +178,6 @@ class GRU(Model):
self.train_optimizer.step()
def test_epoch(self, data_x, data_y):
# prepare training data
x_values = data_x.values
y_values = np.squeeze(data_y.values)
@@ -194,7 +190,6 @@ class GRU(Model):
indices = np.arange(len(x_values))
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
@@ -217,7 +212,6 @@ class GRU(Model):
evals_result=dict(),
save_path=None,
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"],
col_set=["feature", "label"],
@@ -282,7 +276,6 @@ class GRU(Model):
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:

View File

@@ -154,7 +154,6 @@ class GRU(Model):
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric in ("", "loss"):
@@ -163,10 +162,9 @@ class GRU(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def train_epoch(self, data_loader):
self.GRU_model.train()
for (data, weight) in data_loader:
for data, weight in data_loader:
feature = data[:, :, 0:-1].to(self.device)
label = data[:, -1, -1].to(self.device)
@@ -179,14 +177,12 @@ class GRU(Model):
self.train_optimizer.step()
def test_epoch(self, data_loader):
self.GRU_model.eval()
scores = []
losses = []
for (data, weight) in data_loader:
for data, weight in data_loader:
feature = data[:, :, 0:-1].to(self.device)
# feature[torch.isnan(feature)] = 0
label = data[:, -1, -1].to(self.device)
@@ -293,7 +289,6 @@ class GRU(Model):
preds = []
for data in test_loader:
feature = data[:, :, 0:-1].to(self.device)
with torch.no_grad():

Some files were not shown because too many files have changed in this diff Show More