1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 01:21:18 +08:00

Compare commits

..

1 Commits

Author SHA1 Message Date
you-n-g
74cb26c38f Update docs of strategy 2022-07-17 23:04:14 +08:00
202 changed files with 1346 additions and 7118 deletions

6
.github/labeler.yml vendored
View File

@@ -1,6 +0,0 @@
documentation:
- 'docs/**/*'
- '**/*.md'
waiting for triage:
- any: ['**/*', '!docs/**/*', '!**/*.md']

View File

@@ -1,14 +0,0 @@
name: "Add label automatically"
on:
- pull_request_target
jobs:
triage:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v4
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"

View File

@@ -60,7 +60,7 @@ jobs:
- name: Make html with sphinx
run: |
cd docs
sphinx-build -W --keep-going -b html . _build
sphinx-build -b html . build
cd ..
# Check Qlib with pylint
@@ -87,10 +87,9 @@ jobs:
# E1102: not-callable
# E1136: unsubscriptable-object
# References for parameters: https://github.com/PyCQA/pylint/issues/4577#issuecomment-1000245962
# We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
- name: Check Qlib with pylint
run: |
pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500"
# The following flake8 error codes were ignored:
# E501 line too long
@@ -140,7 +139,10 @@ jobs:
- name: Test workflow by config (install from source)
run: |
python -m pip install numba
# Version 0.52.0 of numba must be installed manually in CI, otherwise it will cause incompatibility with the latest version of numpy.
python -m pip install numba==0.52.0
# You must update numpy manually, because when installing python tools, it will try to uninstall numpy and cause CI to fail.
python -m pip install --upgrade numpy
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
- name: Unit tests with Pytest

View File

@@ -8,7 +8,7 @@ on:
jobs:
build:
timeout-minutes: 720
timeout-minutes: 360
# we may retry for 3 times for `Unit tests with Pytest`
runs-on: ${{ matrix.os }}
@@ -29,9 +29,7 @@ jobs:
- name: Set up Python tools
run: |
python -m pip install --upgrade pip
# python -m pip is necessary to upgrade pip.
pip install --upgrade cython numpy
pip install --upgrade cython numpy pip
pip install -e .[dev]
- name: Downloads dependencies data
@@ -52,7 +50,7 @@ jobs:
- name: Unit tests with Pytest
uses: nick-fields/retry@v2
with:
timeout_minutes: 240
timeout_minutes: 120
max_attempts: 3
command: |
cd tests

3
.gitignore vendored
View File

@@ -24,9 +24,6 @@ qlib/VERSION.txt
qlib/data/_libs/expanding.cpp
qlib/data/_libs/rolling.cpp
examples/estimator/estimator_example/
examples/rl/data/
examples/rl/checkpoints/
examples/rl/outputs/
*.egg-info/

View File

@@ -85,7 +85,7 @@ Version 0.4.0
-------------
- Add `data` package that holds all data-related codes
- Reform the data provider structure
- Create a server for data centralized management `qlib-server <https://amc-msra.visualstudio.com/trading-algo/_git/qlib-server>`_
- Create a server for data centralized management `qlib-server<https://amc-msra.visualstudio.com/trading-algo/_git/qlib-server>`_
- Add a `ClientProvider` to work with server
- Add a pluggable cache mechanism
- Add a recursive backtracking algorithm to inspect the furthest reference date for an expression
@@ -166,12 +166,12 @@ Version 0.8.0
- Nested decision execution framework is supported
- There are lots of changes for daily trading, it is hard to list all of them. But a few important changes could be noticed
- The trading limitation is more accurate;
- In `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/backtest/exchange.py#L160>`__, longing and shorting actions share the same action.
- In `current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/backtest/exchange.py#L304>`__, the trading limitation is different between logging and shorting action.
- In `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/backtest/exchange.py#L160>`_, longing and shorting actions share the same action.
- In `current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/backtest/exchange.py#L304>`_, the trading limitation is different between logging and shorting action.
- The constant is different when calculating annualized metrics.
- `Current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/contrib/evaluate.py#L42>`_ uses more accurate constant than `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/evaluate.py#L22>`__
- `A new version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/tests/data.py#L17>`__ of data is released. Due to the unstability of Yahoo data source, the data may be different after downloading data again.
- Users could check out the backtesting results between `Current version <https://github.com/microsoft/qlib/tree/7c31012b507a3823117bddcc693fc64899460b2a/examples/benchmarks>`__ and `previous version <https://github.com/microsoft/qlib/tree/v0.7.2/examples/benchmarks>`__
- `Current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/contrib/evaluate.py#L42>`_ uses more accurate constant than `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/evaluate.py#L22>`_
- `A new version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/tests/data.py#L17>`_ of data is released. Due to the unstability of Yahoo data source, the data may be different after downloading data again.
- Users could check out the backtesting results between `Current version <https://github.com/microsoft/qlib/tree/7c31012b507a3823117bddcc693fc64899460b2a/examples/benchmarks>`_ and `previous version <https://github.com/microsoft/qlib/tree/v0.7.2/examples/benchmarks>`_
Other Versions

View File

@@ -11,8 +11,6 @@
Recent released features
| Feature | Status |
| -- | ------ |
| Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
| RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
| HIST and IGMTF models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1040) on Apr 10, 2022 |
| Qlib [notebook tutorial](https://github.com/microsoft/qlib/tree/main/examples/tutorial) | 📖 [Released](https://github.com/microsoft/qlib/pull/1037) on Apr 7, 2022 |
| Ibovespa index data | :rice: [Released](https://github.com/microsoft/qlib/pull/990) on Apr 6, 2022 |
@@ -69,7 +67,6 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
<li type="circle"><a href="#auto-quant-research-workflow">Auto Quant Research Workflow</a></li>
<li type="circle"><a href="#building-customized-quant-research-workflow-by-code">Building Customized Quant Research Workflow by Code</a></li></ul>
<li><a href="#quant-dataset-zoo"><strong>Quant Dataset Zoo</strong></a></li>
<li><a href="#learning-framework">Learning Framework</a></li>
<li><a href="#more-about-qlib">More About Qlib</a></li>
<li><a href="#offline-mode-and-online-mode">Offline Mode and Online Mode</a>
<ul>
@@ -108,16 +105,21 @@ Your feedbacks about the features are very important.
# Framework of Qlib
<div style="align: center">
<img src="docs/_static/img/framework-abstract.jpg" />
<img src="docs/_static/img/framework.svg" />
</div>
The high-level framework of Qlib can be found above(users can find the [detailed framework](https://qlib.readthedocs.io/en/latest/introduction/introduction.html#framework) of Qlib's design when getting into nitty gritty).
The components are designed as loose-coupled modules, and each component could be used stand-alone.
At the module level, Qlib is a platform that consists of the above components. The components are designed as loose-coupled modules, and each component could be used stand-alone.
Qlib provides a strong infrastructure to support Quant research. [Data](https://qlib.readthedocs.io/en/latest/component/data.html) is always an important part.
A strong learning framework is designed to support diverse learning paradigms (e.g. [reinforcement learning](https://qlib.readthedocs.io/en/latest/component/rl.html), [supervised learning](https://qlib.readthedocs.io/en/latest/component/workflow.html#model-section)) and patterns at different levels(e.g. [market dynamic modeling](https://qlib.readthedocs.io/en/latest/component/meta.html)).
By modeling the market, [trading strategies](https://qlib.readthedocs.io/en/latest/component/strategy.html) will generate trade decisions that will be executed. Multiple trading strategies and executors in different levels or granularities can be [nested to be optimized and run together](https://qlib.readthedocs.io/en/latest/component/highfreq.html).
At last, a comprehensive [analysis](https://qlib.readthedocs.io/en/latest/component/report.html) will be provided and the model can be [served online](https://qlib.readthedocs.io/en/latest/component/online.html) in a low cost.
| Name | Description |
| ------ | ----- |
| `Infrastructure` layer | `Infrastructure` layer provides underlying support for Quant research. `DataServer` provides a high-performance infrastructure for users to manage and retrieve raw data. `Trainer` provides a flexible interface to control the training process of models, which enable algorithms to control the training process. |
| `Workflow` layer | `Workflow` layer covers the whole workflow of quantitative investment. `Information Extractor` extracts data for models. `Forecast Model` focuses on producing all kinds of forecast signals (e.g. _alpha_, risk) for other modules. With these signals `Decision Generator` will generate the target trading decisions(i.e. portfolio, orders) to be executed by `Execution Env` (i.e. the trading market). There may be multiple levels of `Trading Agent` and `Execution Env` (e.g. an _order executor trading agent and intraday order execution environment_ could behave like an interday trading environment and nested in _daily portfolio management trading agent and interday trading environment_ ) |
| `Interface` layer | `Interface` layer tries to present a user-friendly interface for the underlying system. `Analyser` module will provide users detailed analysis reports of forecasting signals, portfolios and execution results |
* The modules with hand-drawn style are under development and will be released in the future.
* The modules with dashed borders are highly user-customizable and extendible.
(p.s. framework image is created with https://draw.io/)
# Quick Start
@@ -168,25 +170,12 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
git clone https://github.com/microsoft/qlib.git && cd qlib
pip install .
```
**Note**: You can install Qlib with `python setup.py install` as well. But it is not the recommended approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
**Note**: You can install Qlib with `python setup.py install` as well. But it is not the recommanded approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
## Data Preparation
Load and prepare data by running the following code:
### Get with module
```bash
# get 1d data
python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
# get 1min data
python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
```
### Get from source
```bash
# get 1d data
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
@@ -402,17 +391,6 @@ Dataset plays a very important role in Quant. Here is a list of the datasets bui
[Here](https://qlib.readthedocs.io/en/latest/advanced/alpha.html) is a tutorial to build dataset with `Qlib`.
Your PR to build new Quant dataset is highly welcomed.
# Learning Framework
Qlib is high customizable and a lot of its components are learnable.
The learnable components are instances of `Forecast Model` and `Trading Agent`. They are learned based on the `Learning Framework` layer and then applied to multiple scenarios in `Workflow` layer.
The learning framework leverages the `Workflow` layer as well(e.g. sharing `Information Extractor`, creating environments based on `Execution Env`).
Based on learning paradigms, they can be categorized into reinforcement learning and supervised learning.
- For supervised learning, the detailed docs can be found [here](https://qlib.readthedocs.io/en/latest/component/model.html).
- For reinforcement learning, the detailed docs can be found [here](https://qlib.readthedocs.io/en/latest/component/rl.html). Qlib's RL learning framework leverages `Execution Env` in `Workflow` layer to create environments. It's worth noting that `NestedExecutor` is supported as well. This empowers users to optimize different level of strategies/models/agents together (e.g. optimizing an order execution strategy for a specific portfolio management strategy).
# More About Qlib
If you want to have a quick glance at the most frequently used components of qlib, you can try notebooks [here](examples/tutorial/).

View File

@@ -17,5 +17,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
pip install -r requirements.txt
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 144 KiB

After

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -38,7 +38,7 @@ Example
DIF = \frac{EMA(CLOSE, 12) - EMA(CLOSE, 26)}{CLOSE}
`DEA` means a 9-period EMA of the DIF.
`DEA`means a 9-period EMA of the DIF.
.. math::

View File

@@ -18,7 +18,7 @@ With this module, users can run their ``task`` automatically at different period
This whole process can be used in `Online Serving <../component/online.html>`_.
An example of the entire process is shown `here <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`__.
An example of the entire process is shown `here <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`_.
Task Generating
===============
@@ -31,10 +31,9 @@ Here is the base class of ``TaskGen``:
.. autoclass:: qlib.workflow.task.gen.TaskGen
:members:
:noindex:
``Qlib`` provides a class `RollingGen <https://github.com/microsoft/qlib/tree/main/qlib/workflow/task/gen.py>`_ to generate a list of ``task`` of the dataset in different date segments.
This class allows users to verify the effect of data from different periods on the model in one experiment. More information is `here <../reference/api.html#TaskGen>`__.
This class allows users to verify the effect of data from different periods on the model in one experiment. More information is `here <../reference/api.html#TaskGen>`_.
Task Storing
============
@@ -54,9 +53,8 @@ Users need to provide the MongoDB URL and database name for using ``TaskManager`
.. autoclass:: qlib.workflow.task.manage.TaskManager
:members:
:noindex:
More information of ``Task Manager`` can be found in `here <../reference/api.html#TaskManager>`__.
More information of ``Task Manager`` can be found in `here <../reference/api.html#TaskManager>`_.
Task Training
=============
@@ -66,13 +64,11 @@ An easy way to get the ``task_func`` is using ``qlib.model.trainer.task_train``
It will run the whole workflow defined by ``task``, which includes *Model*, *Dataset*, *Record*.
.. autofunction:: qlib.workflow.task.manage.run_task
:noindex:
Meanwhile, ``Qlib`` provides a module called ``Trainer``.
.. autoclass:: qlib.model.trainer.Trainer
:members:
:noindex:
``Trainer`` will train a list of tasks and return a list of model recorders.
``Qlib`` offer two kinds of Trainer, TrainerR is the simplest way and TrainerRM is based on TaskManager to help manager tasks lifecycle automatically.

View File

@@ -24,8 +24,8 @@ The introduction of ``Data Layer`` includes the following parts.
Here is a typical example of Qlib data workflow
- Users download data and converting data into Qlib format(with filename suffix `.bin`). In this step, typically only some basic data are stored on disk(such as OHLCV).
- Creating some basic features based on Qlib's expression Engine(e.g. "Ref($close, 60) / $close", the return of last 60 trading days). Supported operators in the expression engine can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/ops.py>`__. This step is typically implemented in Qlib's `Data Loader <https://qlib.readthedocs.io/en/latest/component/data.html#data-loader>`_ which is a component of `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ .
- If users require more complicated data processing (e.g. data normalization), `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ support user-customized processors to process data(some predefined processors can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`__). The processors are different from operators in expression engine. It is designed for some complicated data processing methods which is hard to supported in operators in expression engine.
- Creating some basic features based on Qlib's expression Engine(e.g. "Ref($close, 60) / $close", the return of last 60 trading days). Supported operators in the expression engine can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/ops.py>`_. This step is typically implemented in Qlib's `Data Loader <https://qlib.readthedocs.io/en/latest/component/data.html#data-loader>`_ which is a component of `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ .
- If users require more complicated data processing (e.g. data normalization), `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ support user-customized processors to process data(some predefined processors can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`_). The processors are different from operators in expression engine. It is designed for some complicated data processing methods which is hard to supported in operators in expression engine.
- At last, `Dataset <https://qlib.readthedocs.io/en/latest/component/data.html#dataset>`_ is responsible to prepare model-specific dataset from the processed data of Data Handler
Data Preparation
@@ -37,7 +37,7 @@ Qlib Format Data
We've specially designed a data structure to manage financial data, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information.
Such data will be stored with filename suffix `.bin` (We'll call them `.bin` file, `.bin` format, or qlib format). `.bin` file is designed for scientific computing on finance data.
``Qlib`` provides two different off-the-shelf datasets, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`__:
``Qlib`` provides two different off-the-shelf datasets, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`_:
======================== ================= ================
Dataset US Market China Market
@@ -47,11 +47,11 @@ Alpha360 √ √
Alpha158 √ √
======================== ================= ================
Also, ``Qlib`` provides a high-frequency dataset. Users can run a high-frequency dataset example through this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`__.
Also, ``Qlib`` provides a high-frequency dataset. Users can run a high-frequency dataset example through this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.
Qlib Format Dataset
-------------------
``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the China-Stock dataset as follows. User can also use numpy to load `.bin` file to validate data.
``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the China-Stock dataset as follows.
The price volume data look different from the actual dealling price because of they are **adjusted** (`adjusted price <https://www.investopedia.com/terms/a/adjusted_closing_price.asp>`_). And then you may find that the adjusted price may be different from different data sources. This is because different data sources may vary in the way of adjusting prices. Qlib normalize the price on first trading day of each stock to 1 when adjusting them.
Users can leverage `$factor` to get the original trading price (e.g. `$close / $factor` to get the original close price).
@@ -332,7 +332,6 @@ Here are some interfaces of the ``QlibDataLoader`` class:
.. autoclass:: qlib.data.dataset.loader.DataLoader
:members:
:noindex:
API
---
@@ -362,7 +361,6 @@ Here are some important interfaces that ``DataHandlerLP`` provides:
.. autoclass:: qlib.data.dataset.handler.DataHandlerLP
:members: __init__, fetch, get_cols
:noindex:
If users want to load features and labels by config, users can define a new handler and call the static method `parse_config_to_fields` of ``qlib.contrib.data.handler.Alpha158``.
@@ -453,7 +451,6 @@ The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most im
.. autoclass:: qlib.data.dataset.__init__.DatasetH
:members:
:noindex:
API
---
@@ -473,11 +470,9 @@ Global Memory Cache
.. autoclass:: qlib.data.cache.MemCacheUnit
:members:
:noindex:
.. autoclass:: qlib.data.cache.MemCache
:members:
:noindex:
ExpressionCache
@@ -492,7 +487,6 @@ The following shows the details about the interfaces:
.. autoclass:: qlib.data.cache.ExpressionCache
:members:
:noindex:
``Qlib`` has currently provided implemented disk cache `DiskExpressionCache` which inherits from `ExpressionCache` . The expressions data will be stored in the disk.
@@ -508,7 +502,6 @@ The following shows the details about the interfaces:
.. autoclass:: qlib.data.cache.DatasetCache
:members:
:noindex:
``Qlib`` has currently provided implemented disk cache `DiskDatasetCache` which inherits from `DatasetCache` . The datasets' data will be stored in the disk.
@@ -519,7 +512,7 @@ Data and Cache File Structure
We've specially designed a file structure to manage data and cache, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information. The file structure of data and cache is listed as follows.
.. code-block::
.. code-block:: json
- data/
[raw data] updated by data providers

View File

@@ -8,33 +8,31 @@ Design of Nested Decision Execution Framework for High-Frequency Trading
Introduction
============
Daily trading (e.g. portfolio management) and intraday trading (e.g. orders execution) are two hot topics in Quant investment and are usually studied separately.
Daily trading (e.g. portfolio management) and intraday trading (e.g. orders execution) are two hot topics in Quant investment and usually studied separately.
To get the join trading performance of daily and intraday trading, they must interact with each other and run backtest jointly.
In order to support the joint backtest strategies at multiple levels, a corresponding framework is required. None of the publicly available high-frequency trading frameworks considers multi-level joint trading, which makes the backtesting aforementioned inaccurate.
In order to support the joint backtest strategies in multiple levels, a corresponding framework is required. None of the publicly available high-frequency trading frameworks considers multi-level joint trading, which make the backtesting aforementioned inaccurate.
Besides backtesting, the optimization of strategies from different levels is not standalone and can be affected by each other.
For example, the best portfolio management strategy may change with the performance of order executions(e.g. a portfolio with higher turnover may become a better choice when we improve the order execution strategies).
To achieve overall good performance, it is necessary to consider the interaction of strategies at a different levels.
For example, the best portfolio management strategy may change with the performance of order executions(e.g. a portfolio with higher turnover may becomes a better choice when we improve the order execution strategies).
To achieve the overall good performance , it is necessary to consider the interaction of strategies in different level.
Therefore, building a new framework for trading on multiple levels becomes necessary to solve the various problems mentioned above, for which we designed a nested decision execution framework that considers the interaction of strategies.
Therefore, building a new framework for trading in multiple levels becomes necessary to solve the various problems mentioned above, for which we designed a nested decision execution framework that consider the interaction of strategies.
.. image:: ../_static/img/framework.svg
The design of the framework is shown in the yellow part in the middle of the figure above. Each level consists of ``Trading Agent`` and ``Execution Env``. ``Trading Agent`` has its own data processing module (``Information Extractor``), forecasting module (``Forecast Model``) and decision generator (``Decision Generator``). The trading algorithm generates the decisions by the ``Decision Generator`` based on the forecast signals output by the ``Forecast Module``, and the decisions generated by the trading algorithm are passed to the ``Execution Env``, which returns the execution results.
The frequency of the trading algorithm, decision content and execution environment can be customized by users (e.g. intraday trading, daily-frequency trading, weekly-frequency trading), and the execution environment can be nested with finer-grained trading algorithm and execution environment inside (i.e. sub-workflow in the figure, e.g. daily-frequency orders can be turned into finer-grained decisions by splitting orders within the day). The flexibility of the nested decision execution framework makes it easy for users to explore the effects of combining different levels of trading strategies and break down the optimization barriers between different levels of the trading algorithm.
The optimization for the nested decision execution framework can be implemented with the support of `QlibRL <https://qlib.readthedocs.io/en/latest/component/rl.html>`_. To know more about how to use the QlibRL, go to API Reference: `RL API <../reference/api.html#rl>`_.
The frequency of trading algorithm, decision content and execution environment can be customized by users (e.g. intraday trading, daily-frequency trading, weekly-frequency trading), and the execution environment can be nested with finer-grained trading algorithm and execution environment inside (i.e. sub-workflow in the figure, e.g. daily-frequency orders can be turned into finer-grained decisions by splitting orders within the day). The flexibility of nested decision execution framework makes it easy for users to explore the effects of combining different levels of trading strategies and break down the optimization barriers between different levels of trading algorithm.
Example
=======
An example of a nested decision execution framework for high-frequency can be found `here <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution/workflow.py>`_.
An example of nested decision execution framework for high-frequency can be found `here <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution/workflow.py>`_.
Besides, the above examples, here are some other related works about high-frequency trading in Qlib.
Besides, the above examples, here are some other related work about high-frequency trading in Qlib.
- `Prediction with high-frequency data <https://github.com/microsoft/qlib/tree/main/examples/highfreq#benchmarks-performance-predicting-the-price-trend-in-high-frequency-data>`_
- `Examples <https://github.com/microsoft/qlib/blob/main/examples/orderbook_data/>`_ to extract features from high-frequency data without fixed frequency.
- `Examples <https://github.com/microsoft/qlib/blob/main/examples/orderbook_data/>`_ to extract features form high-frequency data without fixed frequency.
- `A paper <https://github.com/microsoft/qlib/tree/high-freq-execution#high-frequency-execution>`_ for high-frequency trading.

View File

@@ -20,7 +20,6 @@ The base class provides the following interfaces:
.. autoclass:: qlib.model.base.Model
:members:
:noindex:
``Qlib`` also provides a base class `qlib.model.base.ModelFT <../reference/api.html#qlib.model.base.ModelFT>`_, which includes the method for finetuning the model.

View File

@@ -1,4 +1,4 @@
.. _online_serving:
.. _online:
==============
Online Serving
@@ -32,25 +32,21 @@ Online Manager
.. automodule:: qlib.workflow.online.manager
:members:
:noindex:
Online Strategy
===============
.. automodule:: qlib.workflow.online.strategy
:members:
:noindex:
Online Tool
===========
.. automodule:: qlib.workflow.online.utils
:members:
:noindex:
Updater
=======
.. automodule:: qlib.workflow.online.update
:members:
:noindex:

View File

@@ -61,7 +61,6 @@ The ``ExpManager`` module in ``Qlib`` is responsible for managing different expe
.. autoclass:: qlib.workflow.expm.ExpManager
:members: get_exp, list_experiments
:noindex:
For other interfaces such as `create_exp`, `delete_exp`, please refer to `Experiment Manager API <../reference/api.html#experiment-manager>`_.
@@ -72,7 +71,6 @@ The ``Experiment`` class is solely responsible for a single experiment, and it w
.. autoclass:: qlib.workflow.exp.Experiment
:members: get_recorder, list_recorders
:noindex:
For other interfaces such as `search_records`, `delete_recorder`, please refer to `Experiment API <../reference/api.html#experiment>`_.
@@ -87,7 +85,6 @@ Here are some important APIs that are not included in the ``QlibRecorder``:
.. autoclass:: qlib.workflow.recorder.Recorder
:members: list_artifacts, list_metrics, list_params, list_tags
:noindex:
For other interfaces such as `save_objects`, `load_object`, please refer to `Recorder API <../reference/api.html#recorder>`_.
@@ -110,7 +107,7 @@ Here is a simple example of what is done in ``SigAnaRecord``, which users can re
- ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.
Here is a simple example of what is done in ``PortAnaRecord``, which users can refer to if they want to do backtest based on their own prediction and label.
Here is a simple exampke of what is done in ``PortAnaRecord``, which users can refer to if they want to do backtest based on their own prediction and label.
.. code-block:: Python

View File

@@ -51,7 +51,6 @@ API
.. automodule:: qlib.contrib.report.analysis_position.report
:members:
:noindex:
Graphical Result
~~~~~~~~~~~~~~~~
@@ -94,7 +93,6 @@ API
.. automodule:: qlib.contrib.report.analysis_position.score_ic
:members:
:noindex:
Graphical Result
@@ -153,7 +151,6 @@ API
.. automodule:: qlib.contrib.report.analysis_position.risk_analysis
:members:
:noindex:
Graphical Result
@@ -177,7 +174,6 @@ Graphical Result
The `Information Ratio` without cost.
- `excess_return_with_cost`
The `Information Ratio` with cost.
To know more about `Information Ratio`, please refer to `Information Ratio IR <https://www.investopedia.com/terms/i/informationratio.asp>`_.
- `max_drawdown`
- `excess_return_without_cost`
@@ -273,7 +269,6 @@ API
.. automodule:: qlib.contrib.report.analysis_model.analysis_model_performance
:members:
:noindex:
Graphical Results

View File

@@ -1,49 +0,0 @@
The Framework of QlibRL
=======================
QlibRL contains a full set of components that cover the entire lifecycle of an RL pipeline, including building the simulator of the market, shaping states & actions, training policies (strategies), and backtesting strategies in the simulated environment.
QlibRL is basically implemented with the support of Tianshou and Gym frameworks. The high-level structure of QlibRL is demonstrated below:
.. image:: ../../_static/img/QlibRL_framework.png
:width: 600
:align: center
Here, we briefly introduce each component in the figure.
EnvWrapper
------------
EnvWrapper is the complete capsulation of the simulated environment. It receives actions from outside (policy/strategy/agent), simulates the changes in the market, and then replies rewards and updated states, thus forming an interaction loop.
In QlibRL, EnvWrapper is a subclass of gym.Env, so it implements all necessary interfaces of gym.Env. Any classes or pipelines that accept gym.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:
- `Simulator`
The simulator is the core component responsible for the environment simulation. Developers could implement all the logic that is directly related to the environment simulation in the Simulator in any way they like. In QlibRL, there are already two implementations of Simulator for single asset trading: 1) ``SingleAssetOrderExecution``, which is built based on Qlib's backtest toolkits and hence considers a lot of practical trading details but is slow. 2) ``SimpleSingleAssetOrderExecution``, which is built based on a simplified trading simulator, which ignores a lot of details (e.g. trading limitations, rounding) but is quite fast.
- `State interpreter`
The state interpreter is responsible for "interpret" states in the original format (format provided by the simulator) into states in a format that the policy could understand. For example, transform unstructured raw features into numerical tensors.
- `Action interpreter`
The action interpreter is similar to the state interpreter. But instead of states, it interprets actions generated by the policy, from the format provided by the policy to the format that is acceptable to the simulator.
- `Reward function`
The reward function returns a numerical reward to the policy after each time the policy takes an action.
EnvWrapper will organically organize these components. Such decomposition allows for better flexibility in development. For example, if the developers want to train multiple types of policies in the same environment, they only need to design one simulator and design different state interpreters/action interpreters/reward functions for different types of policies.
QlibRL has well-defined base classes for all these 4 components. All the developers need to do is define their own components by inheriting the base classes and then implementing all interfaces required by the base classes. The API for the above base components can be found `here <../../reference/api.html#module-qlib.rl>`__.
Policy
------------
QlibRL directly uses Tianshou's policy. Developers could use policies provided by Tianshou off the shelf, or implement their own policies by inheriting Tianshou's policies.
Training Vessel & Trainer
-------------------------
As stated by their names, training vessels and trainers are helper classes used in training. A training vessel is a ship that contains a simulator/interpreters/reward function/policy, and it controls algorithm-related parts of training. Correspondingly, the trainer is responsible for controlling the runtime parts of training.
As you may have noticed, a training vessel itself holds all the required components to build an EnvWrapper rather than holding an instance of EnvWrapper directly. This allows the training vessel to create duplicates of EnvWrapper dynamically when necessary (for example, under parallel training).
With a training vessel, the trainer could finally launch the training pipeline by simple, Scikit-learn-like interfaces (i.e., ``trainer.fit()``).
The API for Trainer and TrainingVessel and can be found `here <../../reference/api.html#module-qlib.rl.trainer>`__.
The RL module is designed in a loosely-coupled way. Currently, RL examples are integrated with concrete business logic.
But the core part of RL is much simpler than what you see.
To demonstrate the simple core of RL, `a dedicated notebook <https://github.com/microsoft/qlib/tree/main/examples/rl/simple_example.ipynb>`__ for RL without business loss is created.

View File

@@ -1,50 +0,0 @@
=====================================================
Reinforcement Learning in Quantitative Trading
=====================================================
Reinforcement Learning
======================
Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning,
which attempts to optimize an accumulative numerical reward signal by directly interacting with the environment under a few assumptions such as Markov Decision Process(MDP).
As demonstrated in the following figure, an RL system consists of four elements, 1)the agent 2) the environment the agent interacts with 3) the policy that the agent follows to take actions on the environment and 4)the reward signal from the environment to the agent.
In general, the agent can perceive and interpret its environment, take actions and learn through reward, to seek long-term and maximum overall reward to achieve an optimal solution.
.. image:: ../../_static/img/RL_framework.png
:width: 300
:align: center
RL attempts to learn to produce actions by trial and error.
By sampling actions and then observing which one leads to our desired outcome, a policy is obtained to generate optimal actions.
In contrast to supervised learning, RL learns this not from a label but from a time-delayed label called a reward.
This scalar value lets us know whether the current outcome is good or bad.
In a word, the target of RL is to take actions to maximize reward.
The Qlib Reinforcement Learning toolkit (QlibRL) is an RL platform for quantitative investment, which provides support to implement the RL algorithms in Qlib.
Potential Application Scenarios in Quantitative Trading
=======================================================
RL methods have already achieved outstanding achievement in many applications, such as game playing, resource allocating, recommendation, marketing and advertising, etc.
Investment is always a continuous process, taking the stock market as an example, investors need to control their positions and stock holdings by one or more buying and selling behaviors, to maximize the investment returns.
Besides, each buy and sell decision is made by investors after fully considering the overall market information and stock information.
From the view of an investor, the process could be described as a continuous decision-making process generated according to interaction with the market, such problems could be solved by the RL algorithms.
Following are some scenarios where RL can potentially be used in quantitative investment.
Portfolio Construction
----------------------
Portfolio construction is a process of selecting securities optimally by taking a minimum risk to achieve maximum returns. With an RL-based solution, an agent allocates stocks at every time step by obtaining information for each stock and the market. The key is to develop of policy for building a portfolio and make the policy able to pick the optimal portfolio.
Order Execution
---------------
As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Essentially, the goal of order execution is twofold: it not only requires to fulfill the whole order but also targets a more economical execution with maximizing profit gain (or minimizing capital loss). The order execution with only one order of liquidation or acquirement is called single-asset order execution.
Considering stock investment always aim to pursue long-term maximized profits, it usually manifests as a sequential process of continuously adjusting the asset portfolios, execution for multiple orders, including order of liquidation and acquirement, brings more constraints and makes the sequence of execution for different orders should be considered, e.g. before executing an order to buy some stocks, we have to sell at least one stock. The order execution with multiple assets is called multi-asset order execution.
According to the order executions trait of sequential decision-making, an RL-based solution could be applied to solve the order execution. With an RL-based solution, an agent optimizes execution strategy by interacting with the market environment.
With QlibRL, the RL algorithm in the above scenarios can be easily implemented.
Nested Portfolio Construction and Order Executor
------------------------------------------------
QlibRL makes it possible to jointly optimize different levels of strategies/models/agents. Take `Nested Decision Execution Framework <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution>`_ as an example, the optimization of order execution strategy and portfolio management strategies can interact with each other to maximize returns.

View File

@@ -1,175 +0,0 @@
Quick Start
============
.. currentmodule:: qlib
QlibRL provides an example of an implementation of a single asset order execution task and the following is an example of the config file to train with QlibRL.
.. code-block:: yaml
simulator:
# Each step contains 30mins
time_per_step: 30
# Upper bound of volume, should be null or a float between 0 and 1, if it is a float, represent upper bound is calculated by the percentage of the market volume
vol_limit: null
env:
# Concurrent environment workers.
concurrency: 1
# dummy or subproc or shmem. Corresponding to `parallelism in tianshou <https://tianshou.readthedocs.io/en/master/api/tianshou.env.html#vectorenv>`_.
parallel_mode: dummy
action_interpreter:
class: CategoricalActionInterpreter
kwargs:
# Candidate actions, it can be a list with length L: [a_1, a_2,..., a_L] or an integer n, in which case the list of length n+1 is auto-generated, i.e., [0, 1/n, 2/n,..., n/n].
values: 14
# Total number of steps (an upper-bound estimation)
max_step: 8
module_path: qlib.rl.order_execution.interpreter
state_interpreter:
class: FullHistoryStateInterpreter
kwargs:
# Number of dimensions in data.
data_dim: 6
# Equal to the total number of records. For example, in SAOE per minute, data_ticks is the length of the day in minutes.
data_ticks: 240
# The total number of steps (an upper-bound estimation). For example, 390min / 30min-per-step = 13 steps.
max_step: 8
# Provider of the processed data.
processed_data_provider:
class: PickleProcessedDataProvider
module_path: qlib.rl.data.pickle_styled
kwargs:
data_dir: ./data/pickle_dataframe/feature
module_path: qlib.rl.order_execution.interpreter
reward:
class: PAPenaltyReward
kwargs:
# The penalty for a large volume in a short time.
penalty: 100.0
module_path: qlib.rl.order_execution.reward
data:
source:
order_dir: ./data/training_order_split
data_dir: ./data/pickle_dataframe/backtest
# number of time indexes
total_time: 240
# start time index
default_start_time: 0
# end time index
default_end_time: 240
proc_data_dim: 6
num_workers: 0
queue_size: 20
network:
class: Recurrent
module_path: qlib.rl.order_execution.network
policy:
class: PPO
kwargs:
lr: 0.0001
module_path: qlib.rl.order_execution.policy
runtime:
seed: 42
use_cuda: false
trainer:
max_epoch: 2
# Number of episodes collected in each training iteration
repeat_per_collect: 5
earlystop_patience: 2
# Episodes per collect at training.
episode_per_collect: 20
batch_size: 16
# Perform validation every n iterations
val_every_n_epoch: 1
checkpoint_path: ./checkpoints
checkpoint_every_n_iters: 1
And the config file for backtesting:
.. code-block:: yaml
order_file: ./data/backtest_orders.csv
start_time: "9:45"
end_time: "14:44"
qlib:
provider_uri_1min: ./data/bin
feature_root_dir: ./data/pickle
# feature generated by today's information
feature_columns_today: [
"$open", "$high", "$low", "$close", "$vwap", "$volume",
]
# feature generated by yesterday's information
feature_columns_yesterday: [
"$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
]
exchange:
# the expression for buying and selling stock limitation
limit_threshold: ['$close == 0', '$close == 0']
# deal price for buying and selling
deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
volume_threshold:
# volume limits are both buying and selling, "cum" means that this is a cumulative value over time
all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
# the volume limits of buying
buy: ["current", "$close"]
# the volume limits of selling, "current" means that this is a real-time value and will not accumulate over time
sell: ["current", "$close"]
strategies:
30min:
class: TWAPStrategy
module_path: qlib.contrib.strategy.rule_strategy
kwargs: {}
1day:
class: SAOEIntStrategy
module_path: qlib.rl.order_execution.strategy
kwargs:
state_interpreter:
class: FullHistoryStateInterpreter
module_path: qlib.rl.order_execution.interpreter
kwargs:
max_step: 8
data_ticks: 240
data_dim: 6
processed_data_provider:
class: PickleProcessedDataProvider
module_path: qlib.rl.data.pickle_styled
kwargs:
data_dir: ./data/pickle_dataframe/feature
action_interpreter:
class: CategoricalActionInterpreter
module_path: qlib.rl.order_execution.interpreter
kwargs:
values: 14
max_step: 8
network:
class: Recurrent
module_path: qlib.rl.order_execution.network
kwargs: {}
policy:
class: PPO
module_path: qlib.rl.order_execution.policy
kwargs:
lr: 1.0e-4
# Local path to the latest model. The model is generated during training, so please run training first if you want to run backtest with a trained policy. You could also remove this parameter file to run backtest with a randomly initialized policy.
weight_file: ./checkpoints/latest.pth
# Concurrent environment workers.
concurrency: 5
With the above config files, you can start training the agent by the following command:
.. code-block:: console
$ python -m qlib.rl.contrib.train_onpolicy.py --config_path train_config.yml
After the training, you can backtest with the following command:
.. code-block:: console
$ python -m qlib.rl.contrib.backtest.py --config_path backtest_config.yml
In that case, :class:`~qlib.rl.order_execution.simulator_qlib.SingleAssetOrderExecution` and :class:`~qlib.rl.order_execution.simulator_simple.SingleAssetOrderExecutionSimple` as examples for simulator, :class:`qlib.rl.order_execution.interpreter.FullHistoryStateInterpreter` and :class:`qlib.rl.order_execution.interpreter.CategoricalActionInterpreter` as examples for interpreter, :class:`qlib.rl.order_execution.policy.PPO` as an example for policy, and :class:`qlib.rl.order_execution.reward.PAPenaltyReward` as an example for reward.
For the single asset order execution task, if developers have already defined their simulator/interpreters/reward function/policy, they could launch the training and backtest pipeline by simply modifying the corresponding settings in the config files.
The details about the example can be found `here <https://github.com/microsoft/qlib/blob/main/examples/rl/README.md>`_.
In the future, we will provide more examples for different scenarios such as RL-based portfolio construction.

View File

@@ -1,10 +0,0 @@
.. _rl:
========================================================================
Reinforcement Learning in Quantitative Trading
========================================================================
.. toctree::
Overall <overall>
Quick Start <quickstart>
Framework <framework>

View File

@@ -80,7 +80,6 @@ TopkDropoutStrategy
In most cases, ``TopkDrop`` algorithm sells and buys `Drop` stocks every trading day, which yields a turnover rate of 2$\times$`Drop`/$K$.
The following images illustrate a typical scenario.
.. image:: ../_static/img/topk_drop.png
:alt: Topk-Drop

View File

@@ -77,7 +77,7 @@ language = "en_US"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "hidden"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

View File

@@ -15,8 +15,7 @@ Continuous Integration (CI) tools help you stick to the quality standards by run
When you submit a PR request, you can check whether your code passes the CI tests in the "check" section at the bottom of the web page.
1. Qlib will check the code format with black. The PR will raise error if your code does not align to the standard of Qlib(e.g. a common error is the mixed use of space and tab).
You can fix the bug by inputting the following code in the command line.
You can fix the bug by inputing the following code in the command line.
.. code-block:: bash
@@ -33,8 +32,7 @@ When you submit a PR request, you can check whether your code passes the CI test
3. Qlib will check your code style flake8. The checking command is implemented in [github action workflow](https://github.com/microsoft/qlib/blob/0e8b94a552f1c457cfa6cd2c1bb3b87ebb3fb279/.github/workflows/test.yml#L73).
You can fix the bug by inputing the following code in the command line.
You can fix the bug by inputing the following code in the command line.
.. code-block:: bash
@@ -42,8 +40,7 @@ When you submit a PR request, you can check whether your code passes the CI test
4. Qlib has integrated pre-commit, which will make it easier for developers to format their code.
Just run the following two commands, and the code will be automatically formatted using black and flake8 when the git commit command is executed.
Just run the following two commands, and the code will be automatically formatted using black and flake8 when the git commit command is executed.
.. code-block:: bash

View File

@@ -81,7 +81,6 @@ If running on Windows, open **NFS** features and write correct **mount_path**, i
* Open ``Programs and Features``.
* Click ``Turn Windows features on or off``.
* Scroll down and check the option ``Services for NFS``, then click OK
Reference address: https://graspingtech.com/mount-nfs-share-windows-10/
2.config correct mount_path
* In windows, mount path must be not exist path and root path,
@@ -162,7 +161,7 @@ Limitations
API
***
The client is based on `python-socketio <https://python-socketio.readthedocs.io>`_ which is a framework that supports WebSocket client for Python language. The client can only propose requests and receive results, which do not include any calculating procedure.
The client is based on `python-socketio<https://python-socketio.readthedocs.io>`_ which is a framework that supports WebSocket client for Python language. The client can only propose requests and receive results, which do not include any calculating procedure.
Class
-----

View File

@@ -33,7 +33,7 @@ Document Structure
.. toctree::
:maxdepth: 3
:caption: MAIN COMPONENTS:
:caption: COMPONENTS:
Workflow: Workflow Management <component/workflow.rst>
Data Layer: Data Framework & Usage <component/data.rst>
@@ -44,11 +44,10 @@ Document Structure
Qlib Recorder: Experiment Management <component/recorder.rst>
Analysis: Evaluation & Results Analysis <component/report.rst>
Online Serving: Online Management & Strategy & Tool <component/online.rst>
Reinforcement Learning <component/rl/toctree>
.. toctree::
:maxdepth: 3
:caption: OTHER COMPONENTS/FEATURES/TOPICS:
:caption: ADVANCED TOPICS:
Building Formulaic Alphas <advanced/alpha.rst>
Online & Offline mode <advanced/server.rst>
@@ -56,12 +55,6 @@ Document Structure
Task Management <advanced/task_management.rst>
Point-In-Time database <advanced/PIT.rst>
.. toctree::
:maxdepth: 3
:caption: FOR DEVELOPERS:
Code Standard & Development Guidance <developer/code_standard_and_dev_guide.rst>
.. toctree::
:maxdepth: 3
:caption: REFERENCE:

View File

@@ -15,56 +15,38 @@ With ``Qlib``, users can easily try their ideas to create better Quant investmen
Framework
=========
.. image:: ../_static/img/framework.svg
:align: center
At the module level, Qlib is a platform that consists of above components. The components are designed as loose-coupled modules and each component could be used stand-alone.
This framework may be intimidating for new users to Qlib. It tries to accurately include a lot of details of Qlib's design.
For users new to Qlib, you can skip it first and read it later.
======================== ==============================================================================
Name Description
======================== ==============================================================================
`Infrastructure` layer `Infrastructure` layer provides underlying support for Quant research.
`DataServer` provides high-performance infrastructure for users to manage
and retrieve raw data. `Trainer` provides flexible interface to control
the training process of models which enable algorithms controlling the
training process.
=========================== ==============================================================================
Name Description
=========================== ==============================================================================
`Infrastructure` layer `Infrastructure` layer provides underlying support for Quant research.
`DataServer` provides high-performance infrastructure for users to manage
and retrieve raw data. `Trainer` provides flexible interface to control
the training process of models which enable algorithms controlling the
training process.
`Workflow` layer `Workflow` layer covers the whole workflow of quantitative investment.
`Information Extractor` extracts data for models. `Forecast Model` focuses
on producing all kinds of forecast signals (e.g. *alpha*, risk) for other
modules. With these signals `Decision Generator` will generate the target
trading decisions(i.e. portfolio, orders) to be executed by `Execution Env`
(i.e. the trading market). There may be multiple levels of `Trading Agent`
and `Execution Env` (e.g. an *order executor trading agent and intraday
order execution environment* could behave like an interday trading
environment and nested in *daily portfolio management trading agent and
interday trading environment* )
`Learning Framework` layer The `Forecast Model` and `Trading Agent` are learnable. They are learned
based on the `Learning Framework` layer and then applied to multiple scenarios
in `Workflow` layer. The supported learning paradigms can be categorized into
reinforcement learning and supervised learning. The learning framework
leverages the `Workflow` layer as well(e.g. sharing `Information Extractor`,
creating environments based on `Execution Env`).
`Workflow` layer `Workflow` layer covers the whole workflow of quantitative investment.
Both supervised-learning-based strategies and RL-based Strategies
are supported.
`Information Extractor` extracts data for models. `Forecast Model` focuses
on producing all kinds of forecast signals (e.g. *alpha*, risk) for other
modules. With these signals `Decision Generator` will generate the target
trading decisions(i.e. portfolio, orders)
If RL-based Strategies are adopted, the `Policy` is learned in a end-to-end way,
the trading deicsions are generated directly.
Decisions will be executed by `Execution Env`
(i.e. the trading market). There may be multiple levels of `Strategy`
and `Executor` (e.g. an *order executor trading strategy and intraday order executor*
could behave like an interday trading loop and be nested in
*daily portfolio management trading strategy and interday trading executor*
trading loop)
`Interface` layer `Interface` layer tries to present a user-friendly interface for the underlying
system. `Analyser` module will provide users detailed analysis reports of
forecasting signals, portfolios and execution results
=========================== ==============================================================================
`Interface` layer `Interface` layer tries to present a user-friendly interface for the underlying
system. `Analyser` module will provide users detailed analysis reports of
forecasting signals, portfolios and execution results
======================== ==============================================================================
- The modules with hand-drawn style are under development and will be released in the future.
- The modules with dashed borders are highly user-customizable and extendible.
(p.s. framework image is created with https://draw.io/)

View File

@@ -21,7 +21,6 @@ Users can easily intsall ``Qlib`` according to the following steps:
- Before installing ``Qlib`` from source, users need to install some dependencies:
.. code-block::
pip install numpy
pip install --upgrade cython

View File

@@ -1,5 +1,4 @@
.. _api:
=============
API Reference
=============
@@ -117,7 +116,7 @@ Model
Strategy
--------
.. automodule:: qlib.contrib.strategy
.. automodule:: qlib.contrib.strategy.strategy
:members:
Evaluate
@@ -255,38 +254,5 @@ Utils
Serializable
------------
.. automodule:: qlib.utils.serial
.. automodule:: qlib.utils.serial.Serializable
:members:
RL
==============
Base Component
--------------
.. automodule:: qlib.rl
:members:
:imported-members:
Strategy
--------
.. automodule:: qlib.rl.strategy
:members:
:imported-members:
Trainer
-------
.. automodule:: qlib.rl.trainer
:members:
:imported-members:
Order Execution
---------------
.. automodule:: qlib.rl.order_execution
:members:
:imported-members:
Utils
---------------
.. automodule:: qlib.rl.utils
:members:
:imported-members:

View File

@@ -4,4 +4,3 @@ numpy
scipy
scikit-learn
pandas
tianshou

View File

@@ -83,14 +83,15 @@ Load features of certain instruments in a given time range:
>> from qlib.data import D
>> instruments = ['SH600000']
>> fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
>> D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head().to_string()
' $close $volume Ref($close, 1) Mean($close, 3) $high-$low
... instrument datetime
... SH600000 2010-01-04 86.778313 16162960.0 88.825928 88.061483 2.907631
... 2010-01-05 87.433578 28117442.0 86.778313 87.679273 3.235252
... 2010-01-06 85.713585 23632884.0 87.433578 86.641825 1.720009
... 2010-01-07 83.788803 20813402.0 85.713585 85.645322 3.030487
... 2010-01-08 84.730675 16044853.0 83.788803 84.744354 2.047623'
>> D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head()
$close $volume Ref($close, 1) Mean($close, 3) $high-$low
instrument datetime
SH600000 2010-01-04 86.778313 16162960.0 88.825928 88.061483 2.907631
2010-01-05 87.433578 28117442.0 86.778313 87.679273 3.235252
2010-01-06 85.713585 23632884.0 87.433578 86.641825 1.720009
2010-01-07 83.788803 20813402.0 85.713585 85.645322 3.030487
2010-01-08 84.730675 16044853.0 83.788803 84.744354 2.047623
Load features of certain stock pool in a given time range:
@@ -104,14 +105,15 @@ Load features of certain stock pool in a given time range:
>> expressionDFilter = ExpressionDFilter(rule_expression='$close>Ref($close,1)')
>> instruments = D.instruments(market='csi300', filter_pipe=[nameDFilter, expressionDFilter])
>> fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
>> D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head().to_string()
' $close $volume Ref($close, 1) Mean($close, 3) $high-$low
... instrument datetime
... SH600655 2010-01-04 2699.567383 158193.328125 2619.070312 2626.097738 124.580566
... 2010-01-08 2612.359619 77501.406250 2584.567627 2623.220133 83.373047
... 2010-01-11 2712.982422 160852.390625 2612.359619 2636.636556 146.621582
... 2010-01-12 2788.688232 164587.937500 2712.982422 2704.676758 128.413818
... 2010-01-13 2790.604004 145460.453125 2788.688232 2764.091553 128.413818'
>> D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head()
$close $volume Ref($close, 1) Mean($close, 3) $high-$low
instrument datetime
SH600655 2010-01-04 2699.567383 158193.328125 2619.070312 2626.097738 124.580566
2010-01-08 2612.359619 77501.406250 2584.567627 2623.220133 83.373047
2010-01-11 2712.982422 160852.390625 2612.359619 2636.636556 146.621582
2010-01-12 2788.688232 164587.937500 2712.982422 2704.676758 128.413818
2010-01-13 2790.604004 145460.453125 2788.688232 2764.091553 128.413818
For more details about features, please refer `Feature API <../component/data.html>`_.

View File

@@ -21,88 +21,84 @@ The Custom models need to inherit `qlib.model.base.Model <../reference/api.html#
- ``Qlib`` passes the initialized parameters to the \_\_init\_\_ method.
- The hyperparameters of model in the configuration must be consistent with those defined in the `__init__` method.
- Code Example: In the following example, the hyperparameters of model in the configuration file should contain parameters such as `loss:mse`.
.. code-block:: Python
.. code-block:: Python
def __init__(self, loss='mse', **kwargs):
if loss not in {'mse', 'binary'}:
raise NotImplementedError
self._scorer = mean_squared_error if loss == 'mse' else roc_auc_score
self._params.update(objective=loss, **kwargs)
self._model = None
def __init__(self, loss='mse', **kwargs):
if loss not in {'mse', 'binary'}:
raise NotImplementedError
self._scorer = mean_squared_error if loss == 'mse' else roc_auc_score
self._params.update(objective=loss, **kwargs)
self._model = None
- Override the `fit` method
- ``Qlib`` calls the fit method to train the model.
- The parameters must include training feature `dataset`, which is designed in the interface.
- The parameters could include some `optional` parameters with default values, such as `num_boost_round = 1000` for `GBDT`.
- Code Example: In the following example, `num_boost_round = 1000` is an optional parameter.
.. code-block:: Python
.. code-block:: Python
def fit(self, dataset: DatasetH, num_boost_round = 1000, **kwargs):
def fit(self, dataset: DatasetH, num_boost_round = 1000, **kwargs):
# prepare dataset for lgb training and evaluation
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
# prepare dataset for lgb training and evaluation
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
# Lightgbm need 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
else:
raise ValueError("LightGBM doesn't support multi-label training")
# Lightgbm need 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
else:
raise ValueError("LightGBM doesn't support multi-label training")
dtrain = lgb.Dataset(x_train.values, label=y_train)
dvalid = lgb.Dataset(x_valid.values, label=y_valid)
dtrain = lgb.Dataset(x_train.values, label=y_train)
dvalid = lgb.Dataset(x_valid.values, label=y_valid)
# fit the model
self.model = lgb.train(
self.params,
dtrain,
num_boost_round=num_boost_round,
valid_sets=[dtrain, dvalid],
valid_names=["train", "valid"],
early_stopping_rounds=early_stopping_rounds,
verbose_eval=verbose_eval,
evals_result=evals_result,
**kwargs
)
# fit the model
self.model = lgb.train(
self.params,
dtrain,
num_boost_round=num_boost_round,
valid_sets=[dtrain, dvalid],
valid_names=["train", "valid"],
early_stopping_rounds=early_stopping_rounds,
verbose_eval=verbose_eval,
evals_result=evals_result,
**kwargs
)
- Override the `predict` method
- The parameters must include the parameter `dataset`, which will be userd to get the test dataset.
- Return the `prediction score`.
- Please refer to `Model API <../reference/api.html#module-qlib.model.base>`_ for the parameter types of the fit method.
- Code Example: In the following example, users need to use `LightGBM` to predict the label(such as `preds`) of test data `x_test` and return it.
.. code-block:: Python
.. code-block:: Python
def predict(self, dataset: DatasetH, **kwargs)-> pandas.Series:
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
def predict(self, dataset: DatasetH, **kwargs)-> pandas.Series:
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
- Override the `finetune` method (Optional)
- This method is optional to the users. When users want to use this method on their own models, they should inherit the ``ModelFT`` base class, which includes the interface of `finetune`.
- The parameters must include the parameter `dataset`.
- Code Example: In the following example, users will use `LightGBM` as the model and finetune it.
.. code-block:: Python
.. code-block:: Python
def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
# Based on existing model and finetune by train more rounds
dtrain, _ = self._prepare_data(dataset)
self.model = lgb.train(
self.params,
dtrain,
num_boost_round=num_boost_round,
init_model=self.model,
valid_sets=[dtrain],
valid_names=["train"],
verbose_eval=verbose_eval,
)
def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
# Based on existing model and finetune by train more rounds
dtrain, _ = self._prepare_data(dataset)
self.model = lgb.train(
self.params,
dtrain,
num_boost_round=num_boost_round,
init_model=self.model,
valid_sets=[dtrain],
valid_names=["train"],
verbose_eval=verbose_eval,
)
Configuration File
==================
@@ -111,21 +107,21 @@ The configuration file is described in detail in the `Workflow <../component/wor
- Example: The following example describes the `model` field of configuration file about the custom lightgbm model mentioned above, where `module_path` is the module path, `class` is the class name, and `args` is the hyperparameter passed into the __init__ method. All parameters in the field is passed to `self._params` by `\*\*kwargs` in `__init__` except `loss = mse`.
.. code-block:: YAML
.. code-block:: YAML
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
args:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
args:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
Users could find configuration file of the baselines of the ``Model`` in ``examples/benchmarks``. All the configurations of different models are listed under the corresponding model folder.

View File

@@ -1,72 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: CatBoostModel
module_path: qlib.contrib.model.catboost_model
kwargs:
loss: RMSE
learning_rate: 0.0421
subsample: 0.8789
max_depth: 6
num_leaves: 100
thread_count: 20
grow_policy: Lossguide
bootstrap_type: Poisson
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,79 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: []
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: CatBoostModel
module_path: qlib.contrib.model.catboost_model
kwargs:
loss: RMSE
learning_rate: 0.0421
subsample: 0.8789
max_depth: 6
num_leaves: 100
thread_count: 20
grow_policy: Lossguide
bootstrap_type: Poisson
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,97 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DEnsembleModel
module_path: qlib.contrib.model.double_ensemble
kwargs:
base_model: "gbm"
loss: mse
num_models: 6
enable_sr: True
enable_fs: True
alpha1: 1
alpha2: 1
bins_sr: 10
bins_fs: 5
decay: 0.5
sample_ratios:
- 0.8
- 0.7
- 0.6
- 0.5
- 0.4
sub_weights:
- 1
- 0.2
- 0.2
- 0.2
- 0.2
- 0.2
epochs: 28
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
verbosity: -1
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,104 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: []
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DEnsembleModel
module_path: qlib.contrib.model.double_ensemble
kwargs:
base_model: "gbm"
loss: mse
num_models: 6
enable_sr: True
enable_fs: True
alpha1: 1
alpha2: 1
bins_sr: 10
bins_fs: 5
decay: 0.5
sample_ratios:
- 0.8
- 0.7
- 0.6
- 0.5
- 0.4
sub_weights:
- 1
- 0.2
- 0.2
- 0.2
- 0.2
- 0.2
epochs: 136
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
verbosity: -1
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,95 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DEnsembleModel
module_path: qlib.contrib.model.double_ensemble
kwargs:
base_model: "gbm"
loss: mse
num_models: 3
enable_sr: True
enable_fs: True
alpha1: 1
alpha2: 1
bins_sr: 10
bins_fs: 5
decay: 0.5
sample_ratios:
- 0.8
- 0.7
- 0.6
- 0.5
- 0.4
sub_weights:
- 1
- 1
- 1
epochs: 1000
early_stopping_rounds: 50
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
verbosity: -1
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -5,8 +5,6 @@ from qlib.data.inst_processor import InstProcessor
class Resample1minProcessor(InstProcessor):
"""This processor tries to resample the data. It will reasmple the data from 1min freq to day freq by selecting a specific miniute"""
def __init__(self, hour: int, minute: int, **kwargs):
self.hour = hour
self.minute = minute

View File

@@ -35,13 +35,13 @@ task:
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.9
learning_rate: 0.1
subsample: 0.9
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 250
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH

View File

@@ -1,78 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LinearModel
module_path: qlib.contrib.model.linear
kwargs:
estimator: ols
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: True
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,102 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: [
{
"class" : "DropCol",
"kwargs":{"col_list": ["VWAP0"]}
},
{
"class" : "CSZFillna",
"kwargs":{"fields_group": "feature"}
}
]
learn_processors: [
{
"class" : "DropCol",
"kwargs":{"col_list": ["VWAP0"]}
},
{
"class" : "DropnaProcessor",
"kwargs":{"fields_group": "feature"}
},
"DropnaLabel",
{
"class": "CSZScoreNorm",
"kwargs": {"fields_group": "label"}
}
]
process_type: "independent"
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
module_path: qlib.contrib.model.pytorch_nn
kwargs:
loss: mse
lr: 0.002
lr_decay: 0.96
lr_decay_steps: 100
optimizer: adam
max_steps: 8000
batch_size: 8192
GPU: 0
weight_decay: 0.0002
pt_model_kwargs:
input_dim: 157
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,89 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
module_path: qlib.contrib.model.pytorch_nn
kwargs:
loss: mse
lr: 0.002
lr_decay: 0.96
lr_decay_steps: 100
optimizer: adam
max_steps: 8000
batch_size: 4096
GPU: 0
pt_model_kwargs:
input_dim: 360
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -74,15 +74,10 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
- The base model of DoubleEnsemble is LGBM.
- The base model of TCTS is GRU.
- About the datasets
- Alpha158 is a tabular dataset. There are less spatial relationships between different features. Each feature are carefully designed by human (a.k.a feature engineering)
- Alpha158 is a tabular dataset. There are less spatial relationships between different features. Each feature are carefully desgined by human (a.k.a feature engineering)
- Alpha360 contains raw price and volue data without much feature engineering. There are strong strong spatial relationships between the features in the time dimension.
- The metrics can be categorized into two
- Signal-based evaluation: IC, ICIR, Rank IC, Rank ICIR
- ![equation](https://latex.codecogs.com/gif.latex?%5Ctext%7Bcorr%7D%28%5Ctextbf%7Bx%7D%2C%5Ctextbf%7By%7D%29%3D%5Cfrac%7B%5Csum_i%20%28x_i-%5Cbar%7Bx%7D%29%28y_i-%5Cbar%7By%7D%29%7D%7B%5Csqrt%7B%5Csum_i%28x_i-%5Cbar%7Bx%7D%29%5E2%5Csum_i%28y_i-%5Cbar%7By%7D%29%5E2%7D%7D)
- ![equation](https://latex.codecogs.com/gif.latex?%5Ctext%7BIC%7D%5E%7B%28t%29%7D%20%3D%20%5Ctext%7Bcorr%7D%28%5Chat%7B%5Ctextbf%7By%7D%7D%5E%7B%28t%29%7D%2C%20%5Ctextbf%7Bret%7D%5E%7B%28t%29%7D%29)
- ![equation](https://latex.codecogs.com/gif.latex?%5Ctext%7BICIR%7D%20%3D%20%5Cfrac%20%7B%5Ctext%7Bmean%7D%28%5Ctextbf%7BIC%7D%29%7D%20%7B%5Ctext%7Bstd%7D%28%5Ctextbf%7BIC%7D%29%7D)
- ![equation](https://latex.codecogs.com/gif.latex?%5Ctext%7BRank%20IC%7D%5E%7B%28t%29%7D%20%3D%20%5Ctext%7Bcorr%7D%28%5Ctext%7Brank%7D%28%5Chat%7B%5Ctextbf%7By%7D%7D%5E%7B%28t%29%7D%29%2C%20%5Ctext%7Brank%7D%28%5Ctextbf%7Bret%7D%5E%7B%28t%29%7D%29%29)
- ![equation](https://latex.codecogs.com/gif.latex?%5Ctext%7BRank%20ICIR%7D%20%3D%20%5Cfrac%20%7B%5Ctext%7Bmean%7D%28%5Ctextbf%7BRank%20IC%7D%29%7D%20%7B%5Ctext%7Bstd%7D%28%5Ctextbf%7BRankIC%7D%29%7D)
- Portfolio-based metrics: Annualized Return, Information Ratio, Max Drawdown
## Results on CSI500
@@ -107,21 +102,16 @@ python run_all_model.py run 3 lightgbm Alpha158 csi500 # for models with random
```
### Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| Linear | Alpha158 | 0.0332±0.00 | 0.3044±0.00 | 0.0462±0.00 | 0.4326±0.00 | 0.0382±0.00 | 0.1723±0.00 | -0.4876±0.00 |
| MLP | Alpha158 | 0.0229±0.01 | 0.2181±0.05 | 0.0360±0.00 | 0.3409±0.02 | 0.0043±0.02 | 0.0602±0.27 | -0.2184±0.04 |
| LightGBM | Alpha158 | 0.0399±0.00 | 0.4065±0.00 | 0.0482±0.00 | 0.5101±0.00 | 0.1284±0.00 | 1.5650±0.00 | -0.0635±0.00 |
| CatBoost | Alpha158 | 0.0345±0.00 | 0.2855±0.00 | 0.0417±0.00 | 0.3740±0.00 | 0.0496±0.00 | 0.5977±0.00 | -0.1496±0.00 |
| DoubleEnsemble | Alpha158 | 0.0380±0.00 | 0.3659±0.00 | 0.0442±0.00 | 0.4324±0.00 | 0.0382±0.00 | 0.1723±0.00 | -0.4876±0.00 |
| LightGBM | Alpha158 | 0.0377±0.00 | 0.3860±0.00 | 0.0448±0.00 | 0.4675±0.00 | 0.1151±0.00 | 1.3884±0.00 | -0.0898±0.00 |
### Alpha360 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| MLP | Alpha360 | 0.0258±0.00 | 0.2021±0.02 | 0.0426±0.00 | 0.3840±0.02 | 0.0022±0.02 | 0.0301±0.26 | -0.2064±0.02 |
| LightGBM | Alpha360 | 0.0400±0.00 | 0.3605±0.00 | 0.0536±0.00 | 0.5431±0.00 | 0.0505±0.00 | 0.7658±0.02 | -0.1880±0.00 |
| CatBoost | Alpha360 | 0.0382±0.00 | 0.3229±0.00 | 0.0489±0.00 | 0.4649±0.00 | 0.0297±0.00 | 0.4227±0.02 | -0.1499±0.01 |
| DoubleEnsemble | Alpha360 | 0.0361±0.00 | 0.3092±0.00 | 0.0499±0.00 | 0.4793±0.00 | 0.0382±0.00 | 0.1723±0.02 | -0.4876±0.00 |
# Contributing

View File

@@ -170,7 +170,7 @@ class DDGDA:
# 3) train and logging meta model
with R.start(experiment_name=self.meta_exp_name):
R.log_params(**kwargs)
mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=100, seed=43)
mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=200, seed=43)
mm.fit(md)
R.save_objects(model=mm)

View File

@@ -4,21 +4,15 @@ So adapting the forecasting models/strategies to market dynamics is very importa
The table below shows the performances of different solutions on different forecasting models.
## Alpha158 Dataset
Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
```bash
wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
```
## Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------------|---------|----|------|---------|-----------|-------------------|-------------------|--------------|
| RR[Linear] |Alpha158 |0.089|0.577|0.102 |0.627 |0.093 |1.458 |-0.073 |
| DDG-DA[Linear] |Alpha158 |0.096|0.636|0.107 |0.677 |0.067 |0.996 |-0.091 |
| RR[LightGBM] |Alpha158 |0.082|0.589|0.091 |0.626 |0.077 |1.320 |-0.091 |
| DDG-DA[LightGBM] |Alpha158 |0.085|0.658|0.094 |0.686 |0.115 |1.792 |-0.068 |
| RR[Linear] |Alpha158 |0.088|0.570|0.102 |0.622 |0.077 |1.175 |-0.086 |
| DDG-DA[Linear] |Alpha158 |0.093|0.622|0.106 |0.670 |0.085 |1.213 |-0.093 |
| RR[LightGBM] |Alpha158 |0.079|0.566|0.088 |0.592 |0.075 |1.226 |-0.096 |
| DDG-DA[LightGBM] |Alpha158 |0.084|0.639|0.093 |0.664 |0.099 |1.442 |-0.071 |
- The label horizon of the `Alpha158` dataset is set to 20.
- The rolling time intervals are set to 20 trading days.
- The test rolling periods are from January 2017 to August 2020.
- The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved.

View File

@@ -1,60 +0,0 @@
This folder contains a simple example of how to run Qlib RL. It contains:
```
.
├── experiment_config
│ ├── backtest # Backtest config
│ └── training # Training config
├── README.md # Readme (the current file)
└── scripts # Scripts for data pre-processing
```
## Data preparation
Use [AzCopy](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10) to download data:
```
azcopy copy https://qlibpublic.blob.core.windows.net/data/rl/qlib_rl_example_data ./ --recursive
mv qlib_rl_example_data data
```
The downloaded data will be placed at `./data`. The original data are in `data/csv`. To create all data needed by the case, run:
```
bash scripts/data_pipeline.sh
```
After the execution finishes, the `data/` directory should be like:
```
data
├── backtest_orders.csv
├── bin
├── csv
├── pickle
├── pickle_dataframe
└── training_order_split
```
## Run training
Run:
```
python -m qlib.rl.contrib.train_onpolicy --config_path ./experiment_config/training/config.yml
```
After training, checkpoints will be stored under `checkpoints/`.
## Run backtest
```
python -m qlib.rl.contrib.backtest --config_path ./experiment_config/backtest/config.yml
```
The backtest workflow will use the trained model in `checkpoints/`. The backtest summary can be found in `outputs/`.
## Others
The RL module is designed in a loosely-coupled way. Currently, RL examples are integrated with concrete business logic.
But the core part of RL is much simpler than what you see.
To demonstrate the simple core of RL, [a dedicated notebook](./simple_example.ipynb) for RL without business loss is created.

View File

@@ -1,57 +0,0 @@
order_file: ./data/backtest_orders.csv
start_time: "9:45"
end_time: "14:44"
qlib:
provider_uri_1min: ./data/bin
feature_root_dir: ./data/pickle
feature_columns_today: [
"$open", "$high", "$low", "$close", "$vwap", "$volume",
]
feature_columns_yesterday: [
"$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
]
exchange:
limit_threshold: ['$close == 0', '$close == 0']
deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
volume_threshold:
all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
buy: ["current", "$close"]
sell: ["current", "$close"]
strategies:
30min:
class: TWAPStrategy
module_path: qlib.contrib.strategy.rule_strategy
kwargs: {}
1day:
class: SAOEIntStrategy
module_path: qlib.rl.order_execution.strategy
kwargs:
state_interpreter:
class: FullHistoryStateInterpreter
module_path: qlib.rl.order_execution.interpreter
kwargs:
max_step: 8
data_ticks: 240
data_dim: 6
processed_data_provider:
class: PickleProcessedDataProvider
module_path: qlib.rl.data.pickle_styled
kwargs:
data_dir: ./data/pickle_dataframe/feature
action_interpreter:
class: CategoricalActionInterpreter
module_path: qlib.rl.order_execution.interpreter
kwargs:
values: 14
max_step: 8
network:
class: Recurrent
module_path: qlib.rl.order_execution.network
kwargs: {}
policy:
class: PPO
module_path: qlib.rl.order_execution.policy
kwargs:
lr: 1.0e-4
weight_file: ./checkpoints/latest.pth
concurrency: 5

View File

@@ -1,59 +0,0 @@
simulator:
time_per_step: 30
vol_limit: null
env:
concurrency: 1
parallel_mode: dummy
action_interpreter:
class: CategoricalActionInterpreter
kwargs:
values: 14
max_step: 8
module_path: qlib.rl.order_execution.interpreter
state_interpreter:
class: FullHistoryStateInterpreter
kwargs:
data_dim: 6
data_ticks: 240
max_step: 8
processed_data_provider:
class: PickleProcessedDataProvider
module_path: qlib.rl.data.pickle_styled
kwargs:
data_dir: ./data/pickle_dataframe/feature
module_path: qlib.rl.order_execution.interpreter
reward:
class: PAPenaltyReward
kwargs:
penalty: 100.0
module_path: qlib.rl.order_execution.reward
data:
source:
order_dir: ./data/training_order_split
data_dir: ./data/pickle_dataframe/backtest
total_time: 240
default_start_time: 0
default_end_time: 240
proc_data_dim: 6
num_workers: 0
queue_size: 20
network:
class: Recurrent
module_path: qlib.rl.order_execution.network
policy:
class: PPO
kwargs:
lr: 0.0001
module_path: qlib.rl.order_execution.policy
runtime:
seed: 42
use_cuda: false
trainer:
max_epoch: 2
repeat_per_collect: 5
earlystop_patience: 2
episode_per_collect: 20
batch_size: 16
val_every_n_epoch: 1
checkpoint_path: ./checkpoints
checkpoint_every_n_iters: 1

View File

@@ -1,21 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import pickle
import pandas as pd
from tqdm import tqdm
os.makedirs(os.path.join("data", "pickle_dataframe"), exist_ok=True)
for tag in ("backtest", "feature"):
df = pickle.load(open(os.path.join("data", "pickle", f"{tag}.pkl"), "rb"))
df = pd.concat(list(df.values())).reset_index()
df["date"] = df["datetime"].dt.date.astype("datetime64")
instruments = sorted(set(df["instrument"]))
os.makedirs(os.path.join("data", "pickle_dataframe", tag), exist_ok=True)
for instrument in tqdm(instruments):
cur = df[df["instrument"] == instrument].sort_values(by=["datetime"])
cur = cur.set_index(["instrument", "datetime", "date"])
pickle.dump(cur, open(os.path.join("data", "pickle_dataframe", tag, f"{instrument}.pkl"), "wb"))

View File

@@ -1,14 +0,0 @@
# Generate `bin` format data
set -e
python ../../scripts/dump_bin.py dump_all --csv_path ./data/csv --qlib_dir ./data/bin --include_fields open,close,high,low,vwap,volume --symbol_field_name symbol --date_field_name date --freq 1min
# Generate pickle format data
python scripts/gen_pickle_data.py -c scripts/pickle_data_config.yml
if [ -e stat/ ]; then
rm -r stat/
fi
python scripts/collect_pickle_dataframe.py
# Sample orders
python scripts/gen_training_orders.py
python scripts/gen_backtest_orders.py

View File

@@ -1,55 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import argparse
import os
import pandas as pd
import numpy as np
import pickle
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=20220926)
parser.add_argument("--num_order", type=int, default=10)
args = parser.parse_args()
np.random.seed(args.seed)
path = os.path.join("data", "pickle", "backtesttest.pkl")
df = pickle.load(open(path, "rb")).reset_index()
df["date"] = df["datetime"].dt.date.astype("datetime64")
instruments = sorted(set(df["instrument"]))
# TODO: The example is expected to be able to handle data containing missing values.
# TODO: Currently, we just simply skip dates that contain missing data. We will add
# TODO: this feature in the future.
skip_dates = {}
for instrument in instruments:
csv_df = pd.read_csv(os.path.join("data", "csv", f"{instrument}.csv"))
csv_df = csv_df[csv_df["close"].isna()]
dates = set([str(d).split(" ")[0] for d in csv_df["date"]])
skip_dates[instrument] = dates
df_list = []
for instrument in instruments:
print(instrument)
cur_df = df[df["instrument"] == instrument]
dates = sorted(set([str(d).split(" ")[0] for d in cur_df["date"]]))
dates = [date for date in dates if date not in skip_dates[instrument]]
n = args.num_order
df_list.append(
pd.DataFrame(
{
"date": sorted(np.random.choice(dates, size=n, replace=False)),
"instrument": [instrument] * n,
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
"order_type": np.random.randint(low=0, high=2, size=n),
}
).set_index(["date", "instrument"]),
)
total_df = pd.concat(df_list)
total_df.to_csv("data/backtest_orders.csv")

View File

@@ -1,43 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import yaml
import argparse
import os
from copy import deepcopy
from qlib.contrib.data.highfreq_provider import HighFreqProvider
loader = yaml.FullLoader
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", type=str, default="config.yml")
parser.add_argument("-d", "--dest", type=str, default=".")
parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock")
args = parser.parse_args()
conf = yaml.load(open(args.config), Loader=loader)
for k, v in conf.items():
if isinstance(v, dict) and "path" in v:
v["path"] = os.path.join(args.dest, v["path"])
provider = HighFreqProvider(**conf)
# Gen dataframe
if "feature_conf" in conf:
feature = provider._gen_dataframe(deepcopy(provider.feature_conf))
if "backtest_conf" in conf:
backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf))
provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/"
provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/"
# Split by date
if args.split == "date" or args.split == "both":
provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature")
provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest")
# Split by stock
if args.split == "stock" or args.split == "both":
provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature")
provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest")

View File

@@ -1,39 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import argparse
import os
import pandas as pd
import numpy as np
import pickle
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=20220926)
parser.add_argument("--stock", type=str, default="AAPL")
parser.add_argument("--train_size", type=int, default=10)
parser.add_argument("--valid_size", type=int, default=2)
parser.add_argument("--test_size", type=int, default=2)
args = parser.parse_args()
np.random.seed(args.seed)
os.makedirs(os.path.join("data", "training_order_split"), exist_ok=True)
for group, n in zip(("train", "valid", "test"), (args.train_size, args.valid_size, args.test_size)):
path = os.path.join("data", "pickle", f"backtest{group}.pkl")
df = pickle.load(open(path, "rb")).reset_index()
df["date"] = df["datetime"].dt.date.astype("datetime64")
dates = sorted(set([str(d).split(" ")[0] for d in df["date"]]))
data_df = pd.DataFrame(
{
"date": sorted(np.random.choice(dates, size=n, replace=False)),
"instrument": [args.stock] * n,
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
"order_type": [0] * n,
}
).set_index(["date", "instrument"])
os.makedirs(os.path.join("data", "training_order_split", group), exist_ok=True)
pickle.dump(data_df, open(os.path.join("data", "training_order_split", group, f"{args.stock}.pkl"), "wb"))

View File

@@ -1,57 +0,0 @@
# start & end time for training/validation/test datasets
start_time: !!str &start 2020-01-01
end_time: !!str &end 2020-07-31
train_end_time: !!str &tend 2020-03-31
valid_start_time: !!str &vstart 2020-04-01
valid_end_time: !!str &vend 2020-05-31
test_start_time: !!str &tstart 2020-06-01
# the instrument set
instruments: &ins all
# qlib related configuration
qlib_conf:
provider_uri: ./data/bin # path to generated qlib bin
redis_port: 233
feature_conf:
path: ./data/pickle/feature.pkl # output path of feature
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: HighFreqGeneralHandler
module_path: qlib.contrib.data.highfreq_handler
kwargs:
start_time: *start
end_time: *end
fit_start_time: *start
fit_end_time: *tend
instruments: *ins
day_length: 240 # how many minutes in one trading day
infer_processors:
- class: HighFreqNorm
module_path: qlib.contrib.data.highfreq_processor
kwargs:
feature_save_dir: ./stat/ # output path of statistics of features (for feature normalization)
norm_groups:
price: 10
volume: 2
segments:
train: !!python/tuple [*start, *tend]
valid: !!python/tuple [*vstart, *vend]
test: !!python/tuple [*tstart, *end]
backtest_conf:
path: ./data/pickle/backtest.pkl # output path of backtest
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: HighFreqGeneralBacktestHandler
module_path: qlib.contrib.data.highfreq_handler
kwargs:
start_time: *start
end_time: *end
instruments: *ins
day_length: 240
segments:
train: !!python/tuple [*start, *tend]
valid: !!python/tuple [*vstart, *vend]
test: !!python/tuple [*tstart, *end]

File diff suppressed because one or more lines are too long

View File

@@ -253,7 +253,7 @@ class ModelRunner:
default "" indicates that
qlib_uri : str
the uri to install qlib with pip
it could be URI on the remote or local path (NOTE: the local path must be an absolute path)
it could be url on the we or local path (NOTE: the local path must be a absolute path)
exp_folder_name: str
the name of the experiment folder
wait_before_rm_env : bool

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License.
from pathlib import Path
__version__ = "0.9.1"
__version__ = "0.8.6.99"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os
from typing import Union
@@ -34,7 +34,8 @@ def init(default_conf="client", **kwargs):
from .config import C # pylint: disable=C0415
from .data.cache import H # pylint: disable=C0415
logger = get_module_logger("Initialization")
# FIXME: this logger ignored the level in config
logger = get_module_logger("Initialization", level=logging.INFO)
skip_if_reg = kwargs.pop("skip_if_reg", False)
if skip_if_reg and C.registered:
@@ -47,7 +48,6 @@ def init(default_conf="client", **kwargs):
if clear_mem_cache:
H.clear()
C.set(default_conf, **kwargs)
get_module_logger.setLevel(C.logging_level)
# mount nfs
for _freq, provider_uri in C.provider_uri.items():

View File

@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Generator, List, Optional, Tuple, Union
import pandas as pd
from .account import Account
from .report import Indicator, PortfolioMetrics
if TYPE_CHECKING:
from ..strategy.base import BaseStrategy
@@ -19,7 +20,7 @@ if TYPE_CHECKING:
from ..config import C
from ..log import get_module_logger
from ..utils import init_instance_by_config
from .backtest import INDICATOR_METRIC, PORT_METRIC, backtest_loop, collect_data_loop
from .backtest import backtest_loop, collect_data_loop
from .decision import Order
from .exchange import Exchange
from .utils import CommonInfrastructure
@@ -41,7 +42,7 @@ def get_exchange(
close_cost: float = 0.0025,
min_cost: float = 5.0,
limit_threshold: Union[Tuple[str, str], float, None] = None,
deal_price: Union[str, Tuple[str, str], List[str]] = None,
deal_price: Union[str, Tuple[str], List[str]] = None,
**kwargs: Any,
) -> Exchange:
"""get_exchange
@@ -69,10 +70,10 @@ def get_exchange(
min_cost : float
min transaction cost. It is an absolute amount of cost instead of a ratio of your order's deal amount.
e.g. You must pay at least 5 yuan of commission regardless of your order's deal amount.
deal_price: Union[str, Tuple[str, str], List[str]]
deal_price: Union[str, Tuple[str], List[str]]
The `deal_price` supports following two types of input
- <deal_price> : str
- (<buy_price>, <sell_price>): Tuple[str, str] or List[str]
- (<buy_price>, <sell_price>): Tuple[str] or List[str]
<deal_price>, <buy_price> or <sell_price> := <price>
<price> := str
@@ -113,7 +114,7 @@ def get_exchange(
def create_account_instance(
start_time: Union[pd.Timestamp, str],
end_time: Union[pd.Timestamp, str],
benchmark: Optional[str],
benchmark: str,
account: Union[float, int, dict],
pos_type: str = "Position",
) -> Account:
@@ -162,9 +163,7 @@ def create_account_instance(
init_cash=init_cash,
position_dict=position_dict,
pos_type=pos_type,
benchmark_config={}
if benchmark is None
else {
benchmark_config={
"benchmark": benchmark,
"start_time": start_time,
"end_time": end_time,
@@ -177,7 +176,7 @@ def get_strategy_executor(
end_time: Union[pd.Timestamp, str],
strategy: Union[str, dict, object, Path],
executor: Union[str, dict, object, Path],
benchmark: Optional[str] = "SH000300",
benchmark: str = "SH000300",
account: Union[float, int, dict] = 1e9,
exchange_kwargs: dict = {},
pos_type: str = "Position",
@@ -222,7 +221,7 @@ def backtest(
account: Union[float, int, dict] = 1e9,
exchange_kwargs: dict = {},
pos_type: str = "Position",
) -> Tuple[PORT_METRIC, INDICATOR_METRIC]:
) -> Tuple[PortfolioMetrics, Indicator]:
"""initialize the strategy and executor, then backtest function for the interaction of the outermost strategy and
executor in the nested decision execution
@@ -243,7 +242,7 @@ def backtest(
benchmark: str
the benchmark for reporting.
account : Union[float, int, Position]
information for describing how to create the account
information for describing how to creating the account
For `float` or `int`:
Using Account with only initial cash
For `Position`:
@@ -255,9 +254,9 @@ def backtest(
Returns
-------
portfolio_dict: PORT_METRIC
portfolio_metrics_dict: Dict[PortfolioMetrics]
it records the trading portfolio_metrics information
indicator_dict: INDICATOR_METRIC
indicator_dict: Dict[Indicator]
it computes the trading indicator
It is organized in a dict format
@@ -272,7 +271,8 @@ def backtest(
exchange_kwargs,
pos_type=pos_type,
)
return backtest_loop(start_time, end_time, trade_strategy, trade_executor)
portfolio_metrics, indicator = backtest_loop(start_time, end_time, trade_strategy, trade_executor)
return portfolio_metrics, indicator
def collect_data(
@@ -345,4 +345,4 @@ def format_decisions(
return res
__all__ = ["Order", "backtest", "get_strategy_executor"]
__all__ = ["Order", "backtest"]

View File

@@ -236,7 +236,7 @@ class Account:
if not self.current_position.skip_update():
stock_list = self.current_position.get_stock_list()
for code in stock_list:
# if suspended, no new price to be updated, profit is 0
# if suspend, no new price to be updated, profit is 0
if trade_exchange.check_stock_suspended(code, trade_start_time, trade_end_time):
continue
bar_close = cast(float, trade_exchange.get_close(code, trade_start_time, trade_end_time))

View File

@@ -3,12 +3,12 @@
from __future__ import annotations
from typing import Dict, TYPE_CHECKING, Generator, Optional, Tuple, Union, cast
from typing import TYPE_CHECKING, Generator, Optional, Tuple, Union, cast
import pandas as pd
from qlib.backtest.decision import BaseTradeDecision
from qlib.backtest.report import Indicator
from qlib.backtest.report import Indicator, PortfolioMetrics
if TYPE_CHECKING:
from qlib.strategy.base import BaseStrategy
@@ -19,35 +19,30 @@ from tqdm.auto import tqdm
from ..utils.time import Freq
PORT_METRIC = Dict[str, Tuple[pd.DataFrame, dict]]
INDICATOR_METRIC = Dict[str, Tuple[pd.DataFrame, Indicator]]
def backtest_loop(
start_time: Union[pd.Timestamp, str],
end_time: Union[pd.Timestamp, str],
trade_strategy: BaseStrategy,
trade_executor: BaseExecutor,
) -> Tuple[PORT_METRIC, INDICATOR_METRIC]:
) -> Tuple[PortfolioMetrics, Indicator]:
"""backtest function for the interaction of the outermost strategy and executor in the nested decision execution
please refer to the docs of `collect_data_loop`
Returns
-------
portfolio_dict: PORT_METRIC
portfolio_metrics: PortfolioMetrics
it records the trading portfolio_metrics information
indicator_dict: INDICATOR_METRIC
indicator: Indicator
it computes the trading indicator
"""
return_value: dict = {}
for _decision in collect_data_loop(start_time, end_time, trade_strategy, trade_executor, return_value):
pass
portfolio_dict = cast(PORT_METRIC, return_value.get("portfolio_dict"))
indicator_dict = cast(INDICATOR_METRIC, return_value.get("indicator_dict"))
return portfolio_dict, indicator_dict
portfolio_metrics = cast(PortfolioMetrics, return_value.get("portfolio_metrics"))
indicator = cast(Indicator, return_value.get("indicator"))
return portfolio_metrics, indicator
def collect_data_loop(
@@ -88,23 +83,18 @@ def collect_data_loop(
while not trade_executor.finished():
_trade_decision: BaseTradeDecision = trade_strategy.generate_trade_decision(_execute_result)
_execute_result = yield from trade_executor.collect_data(_trade_decision, level=0)
trade_strategy.post_exe_step(_execute_result)
bar.update(1)
trade_strategy.post_upper_level_exe_step()
if return_value is not None:
all_executors = trade_executor.get_all_executors()
portfolio_dict: PORT_METRIC = {}
indicator_dict: INDICATOR_METRIC = {}
for executor in all_executors:
key = "{}{}".format(*Freq.parse(executor.time_per_step))
if executor.trade_account.is_port_metr_enabled():
portfolio_dict[key] = executor.trade_account.get_portfolio_metrics()
indicator_df = executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
indicator_obj = executor.trade_account.get_trade_indicator()
indicator_dict[key] = (indicator_df, indicator_obj)
return_value.update({"portfolio_dict": portfolio_dict, "indicator_dict": indicator_dict})
all_portfolio_metrics = {
"{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.trade_account.get_portfolio_metrics()
for _executor in all_executors
if _executor.trade_account.is_port_metr_enabled()
}
all_indicators = {}
for _executor in all_executors:
key = "{}{}".format(*Freq.parse(_executor.time_per_step))
all_indicators[key] = _executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
all_indicators[key + "_obj"] = _executor.trade_account.get_trade_indicator()
return_value.update({"portfolio_metrics": all_portfolio_metrics, "indicator": all_indicators})

View File

@@ -4,11 +4,10 @@
from __future__ import annotations
from abc import abstractmethod
from datetime import time
from enum import IntEnum
# try to fix circular imports when enabling type hints
from typing import TYPE_CHECKING, Any, ClassVar, Generic, List, Optional, Tuple, TypeVar, Union, cast
from typing import Generic, List, TYPE_CHECKING, Any, ClassVar, Optional, Tuple, TypeVar, Union, cast
from qlib.backtest.utils import TradeCalendarManager
from qlib.data.data import Cal
@@ -24,6 +23,7 @@ from dataclasses import dataclass
import numpy as np
import pandas as pd
DecisionType = TypeVar("DecisionType")
@@ -135,21 +135,6 @@ class Order:
else:
raise NotImplementedError(f"This type of input is not supported")
@property
def key_by_day(self) -> tuple:
"""A hashable & unique key to identify this order, under the granularity in day."""
return self.stock_id, self.date, self.direction
@property
def key(self) -> tuple:
"""A hashable & unique key to identify this order."""
return self.stock_id, self.start_time, self.end_time, self.direction
@property
def date(self) -> pd.Timestamp:
"""Date of the order."""
return pd.Timestamp(self.start_time.replace(hour=0, minute=0, second=0))
class OrderHelper:
"""
@@ -197,8 +182,8 @@ class OrderHelper:
return Order(
stock_id=code,
amount=amount,
start_time=None if start_time is None else pd.Timestamp(start_time),
end_time=None if end_time is None else pd.Timestamp(end_time),
start_time=start_time if start_time is not None else pd.Timestamp(start_time),
end_time=end_time if end_time is not None else pd.Timestamp(end_time),
direction=direction,
)
@@ -264,7 +249,7 @@ class IdxTradeRange(TradeRange):
class TradeRangeByTime(TradeRange):
"""This is a helper function for make decisions"""
def __init__(self, start_time: str | time, end_time: str | time) -> None:
def __init__(self, start_time: str, end_time: str) -> None:
"""
This is a callable class.
@@ -274,13 +259,13 @@ class TradeRangeByTime(TradeRange):
Parameters
----------
start_time : str | time
start_time : str
e.g. "9:30"
end_time : str | time
end_time : str
e.g. "14:30"
"""
self.start_time = pd.Timestamp(start_time).time() if isinstance(start_time, str) else start_time
self.end_time = pd.Timestamp(end_time).time() if isinstance(end_time, str) else end_time
self.start_time = pd.Timestamp(start_time).time()
self.end_time = pd.Timestamp(end_time).time()
assert self.start_time < self.end_time
def __call__(self, trade_calendar: TradeCalendarManager) -> Tuple[int, int]:
@@ -301,7 +286,7 @@ class TradeRangeByTime(TradeRange):
class BaseTradeDecision(Generic[DecisionType]):
"""
Trade decisions are made by strategy and executed by executor
Trade decisions ara made by strategy and executed by executor
Motivation:
Here are several typical scenarios for `BaseTradeDecision`
@@ -550,12 +535,7 @@ class TradeDecisionWO(BaseTradeDecision[Order]):
Besides, the time_range is also included.
"""
def __init__(
self,
order_list: List[Order],
strategy: BaseStrategy,
trade_range: Union[Tuple[int, int], TradeRange] = None,
) -> None:
def __init__(self, order_list: List[object], strategy: BaseStrategy, trade_range: Tuple[int, int] = None) -> None:
super().__init__(strategy, trade_range=trade_range)
self.order_list = cast(List[Order], order_list)
start, end = strategy.trade_calendar.get_step_time()
@@ -576,21 +556,3 @@ class TradeDecisionWO(BaseTradeDecision[Order]):
f"trade_range: {self.trade_range}; "
f"order_list[{len(self.order_list)}]"
)
class TradeDecisionWithDetails(TradeDecisionWO):
"""
Decision with detail information.
Detail information is used to generate execution reports.
"""
def __init__(
self,
order_list: List[Order],
strategy: BaseStrategy,
trade_range: Optional[Tuple[int, int]] = None,
details: Optional[Any] = None,
) -> None:
super().__init__(order_list, strategy, trade_range)
self.details = details

View File

@@ -18,7 +18,7 @@ import pandas as pd
from qlib.backtest.position import BasePosition
from ..config import C
from ..constant import REG_CN, REG_TW
from ..constant import REG_CN
from ..data.data import D
from ..log import get_module_logger
from .decision import Order, OrderDir, OrderHelper
@@ -26,22 +26,13 @@ from .high_performance_ds import BaseQuote, NumpyQuote
class Exchange:
# `quote_df` is a pd.DataFrame class that contains basic information for backtesting
# After some processing, the data will later be maintained by `quote_cls` object for faster data retrieving.
# Some conventions for `quote_df`
# - $close is for calculating the total value at end of each day.
# - if $close is None, the stock on that day is regarded as suspended.
# - $factor is for rounding to the trading unit;
# - if any $factor is missing when $close exists, trading unit rounding will be disabled
quote_df: pd.DataFrame
def __init__(
self,
freq: str = "day",
start_time: Union[pd.Timestamp, str] = None,
end_time: Union[pd.Timestamp, str] = None,
codes: Union[list, str] = "all",
deal_price: Union[str, Tuple[str, str], List[str]] = None,
deal_price: Union[str, Tuple[str], List[str]] = None,
subscribe_fields: list = [],
limit_threshold: Union[Tuple[str, str], float, None] = None,
volume_threshold: Union[tuple, dict] = None,
@@ -141,17 +132,17 @@ class Exchange:
if deal_price is None:
deal_price = C.deal_price
# we have some verbose information here. So logging is enabled
# we have some verbose information here. So logging is enable
self.logger = get_module_logger("online operator")
# TODO: the quote, trade_dates, codes are not necessary.
# It is just for performance consideration.
self.limit_type = self._get_limit_type(limit_threshold)
if limit_threshold is None:
if C.region in [REG_CN, REG_TW]:
if C.region == REG_CN:
self.logger.warning(f"limit_threshold not set. The stocks hit the limit may be bought/sold")
elif self.limit_type == self.LT_FLT and abs(cast(float, limit_threshold)) > 0.1:
if C.region in [REG_CN, REG_TW]:
if C.region == REG_CN:
self.logger.warning(f"limit_threshold may not be set to a reasonable value")
if isinstance(deal_price, str):
@@ -168,7 +159,6 @@ class Exchange:
self.codes = codes
# Necessary fields
# $close is for calculating the total value at end of each day.
# - if $close is None, the stock on that day is regarded as suspended.
# $factor is for rounding to the trading unit
# $change is for calculating the limit of the stock
@@ -209,7 +199,7 @@ class Exchange:
self.end_time,
freq=self.freq,
disk_cache=True,
)
).dropna(subset=["$close"])
self.quote_df.columns = self.all_fields
# check buy_price data and sell_price data
@@ -219,7 +209,7 @@ class Exchange:
self.logger.warning("{} field data contains nan.".format(pstr))
# update trade_w_adj_price
if (self.quote_df["$factor"].isna() & ~self.quote_df["$close"].isna()).any():
if self.quote_df["$factor"].isna().any():
# The 'factor.day.bin' file not exists, and `factor` field contains `nan`
# Use adjusted price
self.trade_w_adj_price = True
@@ -255,9 +245,9 @@ class Exchange:
assert set(self.extra_quote.columns) == set(self.quote_df.columns) - {"$change"}
self.quote_df = pd.concat([self.quote_df, self.extra_quote], sort=False, axis=0)
LT_TP_EXP = "(exp)" # Tuple[str, str]: the limitation is calculated by a Qlib expression.
LT_FLT = "float" # float: the trading limitation is based on `abs($change) < limit_threshold`
LT_NONE = "none" # none: there is no trading limitation
LT_TP_EXP = "(exp)" # Tuple[str, str]
LT_FLT = "float" # float
LT_NONE = "none" # none
def _get_limit_type(self, limit_threshold: Union[tuple, float, None]) -> str:
"""get limit type"""
@@ -271,25 +261,20 @@ class Exchange:
raise NotImplementedError(f"This type of `limit_threshold` is not supported")
def _update_limit(self, limit_threshold: Union[Tuple, float, None]) -> None:
# $close may contain NaN, the nan indicates that the stock is not tradable at that timestamp
suspended = self.quote_df["$close"].isna()
# check limit_threshold
limit_type = self._get_limit_type(limit_threshold)
if limit_type == self.LT_NONE:
self.quote_df["limit_buy"] = suspended
self.quote_df["limit_sell"] = suspended
self.quote_df["limit_buy"] = False
self.quote_df["limit_sell"] = False
elif limit_type == self.LT_TP_EXP:
# set limit
limit_threshold = cast(tuple, limit_threshold)
# astype bool is necessary, because quote_df is an expression and could be float
self.quote_df["limit_buy"] = self.quote_df[limit_threshold[0]].astype("bool") | suspended
self.quote_df["limit_sell"] = self.quote_df[limit_threshold[1]].astype("bool") | suspended
self.quote_df["limit_buy"] = self.quote_df[limit_threshold[0]]
self.quote_df["limit_sell"] = self.quote_df[limit_threshold[1]]
elif limit_type == self.LT_FLT:
limit_threshold = cast(float, limit_threshold)
self.quote_df["limit_buy"] = self.quote_df["$change"].ge(limit_threshold) | suspended
self.quote_df["limit_sell"] = (
self.quote_df["$change"].le(-limit_threshold) | suspended
) # pylint: disable=E1130
self.quote_df["limit_buy"] = self.quote_df["$change"].ge(limit_threshold)
self.quote_df["limit_sell"] = self.quote_df["$change"].le(-limit_threshold) # pylint: disable=E1130
@staticmethod
def _get_vol_limit(volume_threshold: Union[tuple, dict, None]) -> Tuple[Optional[list], Optional[list], set]:
@@ -353,18 +338,8 @@ class Exchange:
- if direction is None, check if tradable for buying and selling.
- if direction == Order.BUY, check the if tradable for buying
- if direction == Order.SELL, check the sell limit for selling.
Returns
-------
True: the trading of the stock is limited (maybe hit the highest/lowest price), hence the stock is not tradable
False: the trading of the stock is not limited, hence the stock may be tradable
"""
# NOTE:
# **all** is used when checking limitation.
# For example, the stock trading is limited in a day if every minute is limited in a day if every minute is limited.
if direction is None:
# The trading limitation is related to the trading direction
# if the direction is not provided, then any limitation from buy or sell will result in trading limitation
buy_limit = self.quote.get_data(stock_id, start_time, end_time, field="limit_buy", method="all")
sell_limit = self.quote.get_data(stock_id, start_time, end_time, field="limit_sell", method="all")
return bool(buy_limit or sell_limit)
@@ -381,24 +356,10 @@ class Exchange:
start_time: pd.Timestamp,
end_time: pd.Timestamp,
) -> bool:
"""if stock is suspended(hence not tradable), True will be returned"""
# is suspended
if stock_id in self.quote.get_all_stock():
# suspended stocks are represented by None $close stock
# The $close may contain NaN,
close = self.quote.get_data(stock_id, start_time, end_time, "$close")
if close is None:
# if no close record exists
return True
elif isinstance(close, IndexData):
# **any** non-NaN $close represents trading opportunity may exist
# if all returned is nan, then the stock is suspended
return cast(bool, cast(IndexData, close).isna().all())
else:
# it is single value, make sure is not None
return np.isnan(close)
return self.quote.get_data(stock_id, start_time, end_time, "$close") is None
else:
# if the stock is not in the stock list, then it is not tradable and regarded as suspended
return True
def is_stock_tradable(
@@ -487,9 +448,9 @@ class Exchange:
start_time: pd.Timestamp,
end_time: pd.Timestamp,
method: Optional[str] = "sum",
) -> Union[None, int, float, bool, IndexData]:
) -> float:
"""get the total deal volume of stock with `stock_id` between the time interval [start_time, end_time)"""
return self.quote.get_data(stock_id, start_time, end_time, field="$volume", method=method)
return cast(float, self.quote.get_data(stock_id, start_time, end_time, field="$volume", method=method))
def get_deal_price(
self,
@@ -498,7 +459,7 @@ class Exchange:
end_time: pd.Timestamp,
direction: OrderDir,
method: Optional[str] = "ts_data_last",
) -> Union[None, int, float, bool, IndexData]:
) -> float:
if direction == OrderDir.SELL:
pstr = self.sell_price
elif direction == OrderDir.BUY:
@@ -511,7 +472,7 @@ class Exchange:
self.logger.warning(f"(stock_id:{stock_id}, trade_time:{(start_time, end_time)}, {pstr}): {deal_price}!!!")
self.logger.warning(f"setting deal_price to close price")
deal_price = self.get_close(stock_id, start_time, end_time, method)
return deal_price
return cast(float, deal_price)
def get_factor(
self,
@@ -540,8 +501,8 @@ class Exchange:
direction: OrderDir = OrderDir.BUY,
) -> dict:
"""
Generates the target position according to the weight and the cash.
NOTE: All the cash will be assigned to the tradable stock.
The generate the target position according to the weight and the cash.
NOTE: All the cash will assigned to the tradable stock.
Parameter:
weight_position : dict {stock_id : weight}; allocate cash by weight_position
among then, weight must be in this range: 0 < weight < 1
@@ -639,7 +600,7 @@ class Exchange:
random.shuffle(sorted_ids)
for stock_id in sorted_ids:
# Do not generate order for the non-tradable stocks
# Do not generate order for the nontradable stocks
if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
continue
@@ -871,11 +832,8 @@ class Exchange:
:param dealt_order_amount: the dealt order amount dict with the format of {stock_id: float}
:return: trade_price, trade_val, trade_cost
"""
trade_price = cast(
float,
self.get_deal_price(order.stock_id, order.start_time, order.end_time, direction=order.direction),
)
total_trade_val = cast(float, self.get_volume(order.stock_id, order.start_time, order.end_time)) * trade_price
trade_price = self.get_deal_price(order.stock_id, order.start_time, order.end_time, direction=order.direction)
total_trade_val = self.get_volume(order.stock_id, order.start_time, order.end_time) * trade_price
order.factor = self.get_factor(order.stock_id, order.start_time, order.end_time)
order.deal_amount = order.amount # set to full amount and clip it step by step
# Clipping amount first

View File

@@ -114,7 +114,7 @@ class BaseExecutor:
self.track_data = track_data
self._trade_exchange = trade_exchange
self.level_infra = LevelInfrastructure()
self.level_infra.reset_infra(common_infra=common_infra, executor=self)
self.level_infra.reset_infra(common_infra=common_infra)
self._settle_type = settle_type
self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra)
if common_infra is None:
@@ -134,8 +134,6 @@ class BaseExecutor:
else:
self.common_infra.update(common_infra)
self.level_infra.reset_infra(common_infra=self.common_infra)
if common_infra.has("trade_account"):
# NOTE: there is a trick in the code.
# shallow copy is used instead of deepcopy.
@@ -258,7 +256,6 @@ class BaseExecutor:
object
trade decision
"""
if self.track_data:
yield trade_decision
@@ -299,7 +296,6 @@ class BaseExecutor:
if return_value is not None:
return_value.update({"execute_result": res})
return res
def get_all_executors(self) -> List[BaseExecutor]:
@@ -400,7 +396,7 @@ class NestedExecutor(BaseExecutor):
trade_decision = updated_trade_decision
# NEW UPDATE
# create a hook for inner strategy to update outer decision
trade_decision = self.inner_strategy.alter_outer_trade_decision(trade_decision)
self.inner_strategy.alter_outer_trade_decision(trade_decision)
return trade_decision
def _collect_data(
@@ -477,9 +473,6 @@ class NestedExecutor(BaseExecutor):
# do nothing and just step forward
sub_cal.step()
# Let inner strategy know that the outer level execution is done.
self.inner_strategy.post_upper_level_exe_step()
return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
def post_inner_exe_step(self, inner_exe_res: List[object]) -> None:
@@ -491,7 +484,6 @@ class NestedExecutor(BaseExecutor):
inner_exe_res :
the execution result of inner task
"""
self.inner_strategy.post_exe_step(inner_exe_res)
def get_all_executors(self) -> List[BaseExecutor]:
"""get all executors, including self and inner_executor.get_all_executors()"""
@@ -587,18 +579,20 @@ class SimulatorExecutor(BaseExecutor):
raise NotImplementedError(f"This type of input is not supported")
return order_it
def _update_dealt_order_amount(self, order: Order) -> None:
"""update date and dealt order amount in the day."""
now_deal_day = self.trade_calendar.get_step_time()[0].floor(freq="D")
if self.deal_day is None or now_deal_day > self.deal_day:
self.dealt_order_amount = defaultdict(float)
self.deal_day = now_deal_day
self.dealt_order_amount[order.stock_id] += order.deal_amount
def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0) -> Tuple[List[object], dict]:
trade_start_time, _ = self.trade_calendar.get_step_time()
execute_result: list = []
for order in self._get_order_iterator(trade_decision):
# Each time we move into a new date, clear `self.dealt_order_amount` since it only maintains intraday
# information.
now_deal_day = self.trade_calendar.get_step_time()[0].floor(freq="D")
if self.deal_day is None or now_deal_day > self.deal_day:
self.dealt_order_amount = defaultdict(float)
self.deal_day = now_deal_day
# execute the order.
# NOTE: The trade_account will be changed in this function
trade_val, trade_cost, trade_price = self.trade_exchange.deal_order(
@@ -607,9 +601,7 @@ class SimulatorExecutor(BaseExecutor):
dealt_order_amount=self.dealt_order_amount,
)
execute_result.append((order, trade_val, trade_cost, trade_price))
self.dealt_order_amount[order.stock_id] += order.deal_amount
self._update_dealt_order_amount(order)
if self.verbose:
print(
"[I {:%Y-%m-%d %H:%M:%S}]: {} {}, price {:.2f}, amount {}, deal_amount {}, factor {}, "

View File

@@ -3,8 +3,9 @@
from __future__ import annotations
import bisect
from abc import abstractmethod
from typing import Any, Set, Tuple, TYPE_CHECKING, Union
from typing import TYPE_CHECKING, Any, Set, Tuple, Union
import numpy as np
@@ -183,8 +184,8 @@ class TradeCalendarManager:
Tuple[int, int]:
the index of the range. **the left and right are closed**
"""
left = int(np.searchsorted(self._calendar, start_time, side="right") - 1)
right = int(np.searchsorted(self._calendar, end_time, side="right") - 1)
left = bisect.bisect_right(list(self._calendar), start_time) - 1
right = bisect.bisect_right(list(self._calendar), end_time) - 1
left -= self.start_index
right -= self.start_index
@@ -247,7 +248,7 @@ class LevelInfrastructure(BaseInfrastructure):
sub_level_infra:
- **NOTE**: this will only work after _init_sub_trading !!!
"""
return {"trade_calendar", "sub_level_infra", "common_infra", "executor"}
return {"trade_calendar", "sub_level_infra", "common_infra"}
def reset_cal(
self,

View File

@@ -75,8 +75,7 @@ class Config:
def set_conf_from_C(self, config_c):
self.update(**config_c.__dict__["_config"])
@staticmethod
def register_from_C(config, skip_register=True):
def register_from_C(self, config, skip_register=True):
from .utils import set_log_with_config # pylint: disable=C0415
if C.registered and skip_register:
@@ -173,9 +172,6 @@ _default_config = {
}
},
"loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"]}},
# To let qlib work with other packages, we shouldn't disable existing loggers.
# Note that this param is default to True according to the documentation of logging.
"disable_existing_loggers": False,
},
# Default config for experiment manager
"exp_manager": {
@@ -203,7 +199,7 @@ _default_config = {
"task_url": "mongodb://localhost:27017/",
"task_db_name": "default_task_db",
},
# Shift minute for highfreq minute data, used in backtest
# Shift minute for highfreq minite data, used in backtest
# if min_data_shift == 0, use default market time [9:30, 11:29, 1:00, 2:59]
# if min_data_shift != 0, use shifted market time [9:30, 11:29, 1:00, 2:59] - shift*minute
"min_data_shift": 0,
@@ -412,7 +408,8 @@ class QlibConfig(Config):
if _logging_config:
set_log_with_config(_logging_config)
logger = get_module_logger("Initialization", kwargs.get("logging_level", self.logging_level))
# FIXME: this logger ignored the level in config
logger = get_module_logger("Initialization", level=logging.INFO)
logger.info(f"default_conf: {default_conf}.")
self.set_mode(default_conf)

View File

@@ -2,11 +2,6 @@
# Licensed under the MIT License.
# REGION CONST
from typing import TypeVar
import numpy as np
import pandas as pd
REG_CN = "cn"
REG_US = "us"
REG_TW = "tw"
@@ -15,8 +10,4 @@ REG_TW = "tw"
EPS = 1e-12
# Infinity in integer
INF = int(1e18)
ONE_DAY = pd.Timedelta("1day")
ONE_MIN = pd.Timedelta("1min")
EPS_T = pd.Timedelta("1s") # use 1 second to exclude the right interval point
float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
INF = 10**18

View File

@@ -57,7 +57,7 @@ class Alpha360(DataHandlerLP):
fit_end_time=None,
filter_pipe=None,
inst_processor=None,
**kwargs
**kwargs,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -67,7 +67,7 @@ class Alpha360(DataHandlerLP):
"kwargs": {
"config": {
"feature": self.get_feature_config(),
"label": kwargs.pop("label", self.get_label_config()),
"label": kwargs.get("label", self.get_label_config()),
},
"filter_pipe": filter_pipe,
"freq": freq,
@@ -82,14 +82,12 @@ class Alpha360(DataHandlerLP):
data_loader=data_loader,
learn_processors=learn_processors,
infer_processors=infer_processors,
**kwargs
)
def get_label_config(self):
return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
@staticmethod
def get_feature_config():
def get_feature_config(self):
# NOTE:
# Alpha360 tries to provide a dataset with original price data
# the original price data includes the prices and volume in the last 60 days.
@@ -101,33 +99,33 @@ class Alpha360(DataHandlerLP):
names = []
for i in range(59, 0, -1):
fields += ["Ref($close, %d)/$close" % i]
names += ["CLOSE%d" % i]
fields += ["Ref($close, %d)/$close" % (i)]
names += ["CLOSE%d" % (i)]
fields += ["$close/$close"]
names += ["CLOSE0"]
for i in range(59, 0, -1):
fields += ["Ref($open, %d)/$close" % i]
names += ["OPEN%d" % i]
fields += ["Ref($open, %d)/$close" % (i)]
names += ["OPEN%d" % (i)]
fields += ["$open/$close"]
names += ["OPEN0"]
for i in range(59, 0, -1):
fields += ["Ref($high, %d)/$close" % i]
names += ["HIGH%d" % i]
fields += ["Ref($high, %d)/$close" % (i)]
names += ["HIGH%d" % (i)]
fields += ["$high/$close"]
names += ["HIGH0"]
for i in range(59, 0, -1):
fields += ["Ref($low, %d)/$close" % i]
names += ["LOW%d" % i]
fields += ["Ref($low, %d)/$close" % (i)]
names += ["LOW%d" % (i)]
fields += ["$low/$close"]
names += ["LOW0"]
for i in range(59, 0, -1):
fields += ["Ref($vwap, %d)/$close" % i]
names += ["VWAP%d" % i]
fields += ["Ref($vwap, %d)/$close" % (i)]
names += ["VWAP%d" % (i)]
fields += ["$vwap/$close"]
names += ["VWAP0"]
for i in range(59, 0, -1):
fields += ["Ref($volume, %d)/($volume+1e-12)" % i]
names += ["VOLUME%d" % i]
fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)]
names += ["VOLUME%d" % (i)]
fields += ["$volume/($volume+1e-12)"]
names += ["VOLUME0"]
@@ -136,7 +134,7 @@ class Alpha360(DataHandlerLP):
class Alpha360vwap(Alpha360):
def get_label_config(self):
return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]
return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"])
class Alpha158(DataHandlerLP):
@@ -153,7 +151,7 @@ class Alpha158(DataHandlerLP):
process_type=DataHandlerLP.PTYPE_A,
filter_pipe=None,
inst_processor=None,
**kwargs
**kwargs,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -163,7 +161,7 @@ class Alpha158(DataHandlerLP):
"kwargs": {
"config": {
"feature": self.get_feature_config(),
"label": kwargs.pop("label", self.get_label_config()),
"label": kwargs.get("label", self.get_label_config()),
},
"filter_pipe": filter_pipe,
"freq": freq,
@@ -178,7 +176,6 @@ class Alpha158(DataHandlerLP):
infer_processors=infer_processors,
learn_processors=learn_processors,
process_type=process_type,
**kwargs
)
def get_feature_config(self):
@@ -193,7 +190,7 @@ class Alpha158(DataHandlerLP):
return self.parse_config_to_fields(conf)
def get_label_config(self):
return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
@staticmethod
def parse_config_to_fields(config):
@@ -262,119 +259,79 @@ class Alpha158(DataHandlerLP):
def use(x):
return x not in exclude and (include is None or x in include)
# Some factor ref: https://guorn.com/static/upload/file/3/134065454575605.pdf
if use("ROC"):
# https://www.investopedia.com/terms/r/rateofchange.asp
# Rate of change, the price change in the past d days, divided by latest close price to remove unit
fields += ["Ref($close, %d)/$close" % d for d in windows]
names += ["ROC%d" % d for d in windows]
if use("MA"):
# https://www.investopedia.com/ask/answers/071414/whats-difference-between-moving-average-and-weighted-moving-average.asp
# Simple Moving Average, the simple moving average in the past d days, divided by latest close price to remove unit
fields += ["Mean($close, %d)/$close" % d for d in windows]
names += ["MA%d" % d for d in windows]
if use("STD"):
# The standard diviation of close price for the past d days, divided by latest close price to remove unit
fields += ["Std($close, %d)/$close" % d for d in windows]
names += ["STD%d" % d for d in windows]
if use("BETA"):
# The rate of close price change in the past d days, divided by latest close price to remove unit
# For example, price increase 10 dollar per day in the past d days, then Slope will be 10.
fields += ["Slope($close, %d)/$close" % d for d in windows]
names += ["BETA%d" % d for d in windows]
if use("RSQR"):
# The R-sqaure value of linear regression for the past d days, represent the trend linear
fields += ["Rsquare($close, %d)" % d for d in windows]
names += ["RSQR%d" % d for d in windows]
if use("RESI"):
# The redisdual for linear regression for the past d days, represent the trend linearity for past d days.
fields += ["Resi($close, %d)/$close" % d for d in windows]
names += ["RESI%d" % d for d in windows]
if use("MAX"):
# The max price for past d days, divided by latest close price to remove unit
fields += ["Max($high, %d)/$close" % d for d in windows]
names += ["MAX%d" % d for d in windows]
if use("LOW"):
# The low price for past d days, divided by latest close price to remove unit
fields += ["Min($low, %d)/$close" % d for d in windows]
names += ["MIN%d" % d for d in windows]
if use("QTLU"):
# The 80% quantile of past d day's close price, divided by latest close price to remove unit
# Used with MIN and MAX
fields += ["Quantile($close, %d, 0.8)/$close" % d for d in windows]
names += ["QTLU%d" % d for d in windows]
if use("QTLD"):
# The 20% quantile of past d day's close price, divided by latest close price to remove unit
fields += ["Quantile($close, %d, 0.2)/$close" % d for d in windows]
names += ["QTLD%d" % d for d in windows]
if use("RANK"):
# Get the percentile of current close price in past d day's close price.
# Represent the current price level comparing to past N days, add additional information to moving average.
fields += ["Rank($close, %d)" % d for d in windows]
names += ["RANK%d" % d for d in windows]
if use("RSV"):
# Represent the price position between upper and lower resistent price for past d days.
fields += ["($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" % (d, d, d) for d in windows]
names += ["RSV%d" % d for d in windows]
if use("IMAX"):
# The number of days between current date and previous highest price date.
# Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
# The indicator measures the time between highs and the time between lows over a time period.
# The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
fields += ["IdxMax($high, %d)/%d" % (d, d) for d in windows]
names += ["IMAX%d" % d for d in windows]
if use("IMIN"):
# The number of days between current date and previous lowest price date.
# Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
# The indicator measures the time between highs and the time between lows over a time period.
# The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
fields += ["IdxMin($low, %d)/%d" % (d, d) for d in windows]
names += ["IMIN%d" % d for d in windows]
if use("IMXD"):
# The time period between previous lowest-price date occur after highest price date.
# Large value suggest downward momemtum.
fields += ["(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) for d in windows]
names += ["IMXD%d" % d for d in windows]
if use("CORR"):
# The correlation between absolute close price and log scaled trading volume
fields += ["Corr($close, Log($volume+1), %d)" % d for d in windows]
names += ["CORR%d" % d for d in windows]
if use("CORD"):
# The correlation between price change ratio and volume change ratio
fields += ["Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d for d in windows]
names += ["CORD%d" % d for d in windows]
if use("CNTP"):
# The percentage of days in past d days that price go up.
fields += ["Mean($close>Ref($close, 1), %d)" % d for d in windows]
names += ["CNTP%d" % d for d in windows]
if use("CNTN"):
# The percentage of days in past d days that price go down.
fields += ["Mean($close<Ref($close, 1), %d)" % d for d in windows]
names += ["CNTN%d" % d for d in windows]
if use("CNTD"):
# The diff between past up day and past down day
fields += ["Mean($close>Ref($close, 1), %d)-Mean($close<Ref($close, 1), %d)" % (d, d) for d in windows]
names += ["CNTD%d" % d for d in windows]
if use("SUMP"):
# The total gain / the absolute total price changed
# Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
fields += [
"Sum(Greater($close-Ref($close, 1), 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
for d in windows
]
names += ["SUMP%d" % d for d in windows]
if use("SUMN"):
# The total lose / the absolute total price changed
# Can be derived from SUMP by SUMN = 1 - SUMP
# Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
fields += [
"Sum(Greater(Ref($close, 1)-$close, 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
for d in windows
]
names += ["SUMN%d" % d for d in windows]
if use("SUMD"):
# The diff ratio between total gain and total lose
# Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
fields += [
"(Sum(Greater($close-Ref($close, 1), 0), %d)-Sum(Greater(Ref($close, 1)-$close, 0), %d))"
"/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d, d)
@@ -382,15 +339,12 @@ class Alpha158(DataHandlerLP):
]
names += ["SUMD%d" % d for d in windows]
if use("VMA"):
# Simple Volume Moving average: https://www.barchart.com/education/technical-indicators/volume_moving_average
fields += ["Mean($volume, %d)/($volume+1e-12)" % d for d in windows]
names += ["VMA%d" % d for d in windows]
if use("VSTD"):
# The standard deviation for volume in past d days.
fields += ["Std($volume, %d)/($volume+1e-12)" % d for d in windows]
names += ["VSTD%d" % d for d in windows]
if use("WVMA"):
# The volume weighted price change volatility
fields += [
"Std(Abs($close/Ref($close, 1)-1)*$volume, %d)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, %d)+1e-12)"
% (d, d)
@@ -398,7 +352,6 @@ class Alpha158(DataHandlerLP):
]
names += ["WVMA%d" % d for d in windows]
if use("VSUMP"):
# The total volume increase / the absolute total volume changed
fields += [
"Sum(Greater($volume-Ref($volume, 1), 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
% (d, d)
@@ -406,8 +359,6 @@ class Alpha158(DataHandlerLP):
]
names += ["VSUMP%d" % d for d in windows]
if use("VSUMN"):
# The total volume increase / the absolute total volume changed
# Can be derived from VSUMP by VSUMN = 1 - VSUMP
fields += [
"Sum(Greater(Ref($volume, 1)-$volume, 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
% (d, d)
@@ -415,8 +366,6 @@ class Alpha158(DataHandlerLP):
]
names += ["VSUMN%d" % d for d in windows]
if use("VSUMD"):
# The diff ratio between total volume increase and total volume decrease
# RSI indicator for volume
fields += [
"(Sum(Greater($volume-Ref($volume, 1), 0), %d)-Sum(Greater(Ref($volume, 1)-$volume, 0), %d))"
"/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)" % (d, d, d)
@@ -429,4 +378,4 @@ class Alpha158(DataHandlerLP):
class Alpha158vwap(Alpha158):
def get_label_config(self):
return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]
return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"])

View File

@@ -1,7 +1,5 @@
from qlib.data.dataset.handler import DataHandler, DataHandlerLP
from .handler import check_transform_proc
EPSILON = 1e-4
@@ -17,9 +15,20 @@ class HighFreqHandler(DataHandlerLP):
fit_end_time=None,
drop_raw=True,
):
def check_transform_proc(proc_l):
new_l = []
for p in proc_l:
p["kwargs"].update(
{
"fit_start_time": fit_start_time,
"fit_end_time": fit_end_time,
}
)
new_l.append(p)
return new_l
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
infer_processors = check_transform_proc(infer_processors)
learn_processors = check_transform_proc(learn_processors)
data_loader = {
"class": "QlibDataLoader",
@@ -101,100 +110,6 @@ class HighFreqHandler(DataHandlerLP):
return fields, names
class HighFreqGeneralHandler(DataHandlerLP):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
infer_processors=[],
learn_processors=[],
fit_start_time=None,
fit_end_time=None,
drop_raw=True,
day_length=240,
freq="1min",
columns=["$open", "$high", "$low", "$close", "$vwap"],
):
self.day_length = day_length
self.columns = columns
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": freq,
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
infer_processors=infer_processors,
learn_processors=learn_processors,
drop_raw=drop_raw,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_paused = f"Cut({{0}}, {self.day_length * 2}, None)"
def get_normalized_price_feature(price_field, shift=0):
# norm with the close price of 237th minute of yesterday.
if shift == 0:
template_norm = f"{{0}}/DayLast(Ref({{1}}, {self.day_length * 2}))"
else:
template_norm = f"Ref({{0}}, " + str(shift) + f")/DayLast(Ref({{1}}, {self.day_length}))"
template_fillnan = "FFillNan({0})"
# calculate -> ffill -> remove paused
feature_ops = template_paused.format(
template_fillnan.format(
template_norm.format(template_if.format("$close", price_field), template_fillnan.format("$close"))
)
)
return feature_ops
for column_name in self.columns:
fields.append(get_normalized_price_feature(column_name, 0))
names.append(column_name)
for column_name in self.columns:
fields.append(get_normalized_price_feature(column_name, self.day_length))
names.append(column_name + "_1")
# calculate and fill nan with 0
fields += [
template_paused.format(
"If(IsNull({0}), 0, {0})".format(
f"{{0}}/Ref(DayLast(Mean({{0}}, {self.day_length * 30})), {self.day_length})".format("$volume")
)
)
]
names += ["$volume"]
fields += [
template_paused.format(
"If(IsNull({0}), 0, {0})".format(
f"Ref({{0}}, {self.day_length})/Ref(DayLast(Mean({{0}}, {self.day_length * 30})), {self.day_length})".format(
"$volume"
)
)
)
]
names += ["$volume_1"]
return fields, names
class HighFreqBacktestHandler(DataHandler):
def __init__(
self,
@@ -217,272 +132,13 @@ class HighFreqBacktestHandler(DataHandler):
data_loader=data_loader,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_paused = "Select(Gt($paused_num, 1.001), {0})"
template_fillnan = "FFillNan({0})"
fields += [
template_fillnan.format(template_paused.format("$close")),
]
names += ["$close0"]
fields += [
template_paused.format(
template_if.format(
template_fillnan.format("$close"),
"$vwap",
)
)
]
names += ["$vwap0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$volume"))]
names += ["$volume0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$factor"))]
names += ["$factor0"]
return fields, names
class HighFreqGeneralBacktestHandler(DataHandler):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
day_length=240,
freq="1min",
columns=["$close", "$vwap", "$volume"],
):
self.day_length = day_length
self.columns = set(columns)
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": freq,
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
)
def get_feature_config(self):
fields = []
names = []
if "$close" in self.columns:
template_paused = f"Cut({{0}}, {self.day_length * 2}, None)"
template_fillnan = "FFillNan({0})"
template_if = "If(IsNull({1}), {0}, {1})"
fields += [
template_paused.format(template_fillnan.format("$close")),
]
names += ["$close0"]
if "$vwap" in self.columns:
fields += [
template_paused.format(template_if.format(template_fillnan.format("$close"), "$vwap")),
]
names += ["$vwap0"]
if "$volume" in self.columns:
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$volume"))]
names += ["$volume0"]
return fields, names
class HighFreqOrderHandler(DataHandlerLP):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
infer_processors=[],
learn_processors=[],
fit_start_time=None,
fit_end_time=None,
drop_raw=True,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
infer_processors=infer_processors,
learn_processors=learn_processors,
drop_raw=drop_raw,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_ifinf = "If(IsInf({1}), {0}, {1})"
template_paused = "Select(Gt($paused_num, 1.001), {0})"
def get_normalized_price_feature(price_field, shift=0):
# norm with the close price of 237th minute of yesterday.
if shift == 0:
template_norm = "{0}/DayLast(Ref({1}, 243))"
else:
template_norm = "Ref({0}, " + str(shift) + ")/DayLast(Ref({1}, 243))"
template_fillnan = "FFillNan({0})"
# calculate -> ffill -> remove paused
feature_ops = template_paused.format(
template_fillnan.format(
template_norm.format(template_if.format("$close", price_field), template_fillnan.format("$close"))
)
)
return feature_ops
def get_normalized_vwap_price_feature(price_field, shift=0):
# norm with the close price of 237th minute of yesterday.
if shift == 0:
template_norm = "{0}/DayLast(Ref({1}, 243))"
else:
template_norm = "Ref({0}, " + str(shift) + ")/DayLast(Ref({1}, 243))"
template_fillnan = "FFillNan({0})"
# calculate -> ffill -> remove paused
feature_ops = template_paused.format(
template_fillnan.format(
template_norm.format(
template_if.format("$close", template_ifinf.format("$close", price_field)),
template_fillnan.format("$close"),
)
)
)
return feature_ops
fields += [get_normalized_price_feature("$open", 0)]
fields += [get_normalized_price_feature("$high", 0)]
fields += [get_normalized_price_feature("$low", 0)]
fields += [get_normalized_price_feature("$close", 0)]
fields += [get_normalized_vwap_price_feature("$vwap", 0)]
names += ["$open", "$high", "$low", "$close", "$vwap"]
fields += [get_normalized_price_feature("$open", 240)]
fields += [get_normalized_price_feature("$high", 240)]
fields += [get_normalized_price_feature("$low", 240)]
fields += [get_normalized_price_feature("$close", 240)]
fields += [get_normalized_vwap_price_feature("$vwap", 240)]
names += ["$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1"]
fields += [get_normalized_price_feature("$bid", 0)]
fields += [get_normalized_price_feature("$ask", 0)]
names += ["$bid", "$ask"]
fields += [get_normalized_price_feature("$bid", 240)]
fields += [get_normalized_price_feature("$ask", 240)]
names += ["$bid_1", "$ask_1"]
# calculate and fill nan with 0
def get_volume_feature(volume_field, shift=0):
template_gzero = "If(Ge({0}, 0), {0}, 0)"
if shift == 0:
feature_ops = template_gzero.format(
template_paused.format(
"If(IsInf({0}), 0, {0})".format(
"If(IsNull({0}), 0, {0})".format(
"{0}/Ref(DayLast(Mean({0}, 7200)), 240)".format(volume_field)
)
)
)
)
else:
feature_ops = template_gzero.format(
template_paused.format(
"If(IsInf({0}), 0, {0})".format(
"If(IsNull({0}), 0, {0})".format(
f"Ref({{0}}, {shift})/Ref(DayLast(Mean({{0}}, 7200)), 240)".format(volume_field)
)
)
)
)
return feature_ops
fields += [get_volume_feature("$volume", 0)]
names += ["$volume"]
fields += [get_volume_feature("$volume", 240)]
names += ["$volume_1"]
fields += [get_volume_feature("$bidV", 0)]
fields += [get_volume_feature("$bidV1", 0)]
fields += [get_volume_feature("$bidV3", 0)]
fields += [get_volume_feature("$bidV5", 0)]
fields += [get_volume_feature("$askV", 0)]
fields += [get_volume_feature("$askV1", 0)]
fields += [get_volume_feature("$askV3", 0)]
fields += [get_volume_feature("$askV5", 0)]
names += ["$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5"]
fields += [get_volume_feature("$bidV", 240)]
fields += [get_volume_feature("$bidV1", 240)]
fields += [get_volume_feature("$bidV3", 240)]
fields += [get_volume_feature("$bidV5", 240)]
fields += [get_volume_feature("$askV", 240)]
fields += [get_volume_feature("$askV1", 240)]
fields += [get_volume_feature("$askV3", 240)]
fields += [get_volume_feature("$askV5", 240)]
names += ["$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1"]
return fields, names
class HighFreqBacktestOrderHandler(DataHandler):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
):
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_paused = "Select(Gt($hx_paused_num, 1.001), {0})"
# template_paused = "{0}"
template_fillnan = "FFillNan({0})"
fields += [
template_fillnan.format(template_paused.format("$close")),
@@ -502,34 +158,7 @@ class HighFreqBacktestOrderHandler(DataHandler):
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$volume"))]
names += ["$volume0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$bid"))]
names += ["$bid0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$bidV"))]
names += ["$bidV0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$ask"))]
names += ["$ask0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$askV"))]
names += ["$askV0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("($bid + $ask) / 2"))]
names += ["$median0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$factor"))]
names += ["$factor0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$downlimitmarket"))]
names += ["$downlimitmarket0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$uplimitmarket"))]
names += ["$uplimitmarket0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$highmarket"))]
names += ["$highmarket0"]
fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$lowmarket"))]
names += ["$lowmarket0"]
return fields, names

View File

@@ -4,7 +4,6 @@ import datetime
from typing import Optional
import qlib
from qlib import get_module_logger
from qlib.data import D
from qlib.config import REG_CN
from qlib.utils import init_instance_by_config
@@ -13,6 +12,7 @@ from qlib.data.data import Cal
from qlib.contrib.ops.high_freq import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut
import pickle as pkl
from joblib import Parallel, delayed
from utilsd.logging import print_log
class HighFreqProvider:
@@ -28,7 +28,6 @@ class HighFreqProvider:
feature_conf: dict,
label_conf: Optional[dict] = None,
backtest_conf: dict = None,
freq: str = "1min",
**kwargs,
) -> None:
self.start_time = start_time
@@ -42,8 +41,6 @@ class HighFreqProvider:
self.label_conf = label_conf
self.backtest_conf = backtest_conf
self.qlib_conf = qlib_conf
self.logger = get_module_logger("HighFreqProvider")
self.freq = freq
def get_pre_datasets(self):
"""Generate the training, validation and test datasets for prediction
@@ -118,8 +115,8 @@ class HighFreqProvider:
# This code used the copy-on-write feature of Linux
# to avoid calculating the calendar multiple times in the subprocess.
# This code may accelerate, but may be not useful on Windows and Mac Os
Cal.calendar(freq=self.freq)
get_calendar_day(freq=self.freq)
Cal.calendar(freq="1min")
get_calendar_day(freq="1min")
def _gen_dataframe(self, config, datasets=["train", "valid", "test"]):
try:
@@ -128,7 +125,7 @@ class HighFreqProvider:
raise ValueError("Must specify the path to save the dataset.") from e
if os.path.isfile(path):
start = time.time()
self.logger.info("Dataset exists, load from disk.", __name__)
print_log("Dataset exists, load from disk.", __name__)
# res = dataset.prepare(['train', 'valid', 'test'])
with open(path, "rb") as f:
@@ -137,11 +134,11 @@ class HighFreqProvider:
res = [data[i] for i in datasets]
else:
res = data.prepare(datasets)
self.logger.info(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
else:
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
self.logger.info("Generating dataset", __name__)
print_log("Generating dataset", __name__)
start_time = time.time()
self._prepare_calender_cache()
dataset = init_instance_by_config(config)
@@ -160,7 +157,7 @@ class HighFreqProvider:
with open(path[:-4] + "test.pkl", "wb") as f:
pkl.dump(testset, f)
res = [data[i] for i in datasets]
self.logger.info(f"Data generated, time cost: {(time.time() - start_time):.2f}", __name__)
print_log(f"Data generated, time cost: {(time.time() - start_time):.2f}", __name__)
return res
def _gen_data(self, config, datasets=["train", "valid", "test"]):
@@ -170,7 +167,7 @@ class HighFreqProvider:
raise ValueError("Must specify the path to save the dataset.") from e
if os.path.isfile(path):
start = time.time()
self.logger.info("Dataset exists, load from disk.", __name__)
print_log("Dataset exists, load from disk.", __name__)
# res = dataset.prepare(['train', 'valid', 'test'])
with open(path, "rb") as f:
@@ -179,18 +176,18 @@ class HighFreqProvider:
res = [data[i] for i in datasets]
else:
res = data.prepare(datasets)
self.logger.info(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
else:
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
self.logger.info("Generating dataset", __name__)
print_log("Generating dataset", __name__)
start_time = time.time()
self._prepare_calender_cache()
dataset = init_instance_by_config(config)
dataset.config(dump_all=True, recursive=True)
dataset.to_pickle(path)
res = dataset.prepare(datasets)
self.logger.info(f"Data generated, time cost: {(time.time() - start_time):.2f}", __name__)
print_log(f"Data generated, time cost: {(time.time() - start_time):.2f}", __name__)
return res
def _gen_dataset(self, config):
@@ -200,21 +197,21 @@ class HighFreqProvider:
raise ValueError("Must specify the path to save the dataset.") from e
if os.path.isfile(path):
start = time.time()
self.logger.info("Dataset exists, load from disk.", __name__)
print_log("Dataset exists, load from disk.", __name__)
with open(path, "rb") as f:
dataset = pkl.load(f)
self.logger.info(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Data loaded, time cost: {time.time() - start:.2f}", __name__)
else:
start = time.time()
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
self.logger.info("Generating dataset", __name__)
print_log("Generating dataset", __name__)
self._prepare_calender_cache()
dataset = init_instance_by_config(config)
self.logger.info(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
dataset.prepare(["train", "valid", "test"])
self.logger.info(f"Dataset prepared, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Dataset prepared, time cost: {time.time() - start:.2f}", __name__)
dataset.config(dump_all=True, recursive=True)
dataset.to_pickle(path)
return dataset
@@ -227,22 +224,22 @@ class HighFreqProvider:
if os.path.isfile(path + "tmp_dataset.pkl"):
start = time.time()
self.logger.info("Dataset exists, load from disk.", __name__)
print_log("Dataset exists, load from disk.", __name__)
else:
start = time.time()
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
self.logger.info("Generating dataset", __name__)
print_log("Generating dataset", __name__)
self._prepare_calender_cache()
dataset = init_instance_by_config(config)
self.logger.info(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
dataset.config(dump_all=False, recursive=True)
dataset.to_pickle(path + "tmp_dataset.pkl")
with open(path + "tmp_dataset.pkl", "rb") as f:
new_dataset = pkl.load(f)
time_list = D.calendar(start_time=self.start_time, end_time=self.end_time, freq=self.freq)[::240]
time_list = D.calendar(start_time=self.start_time, end_time=self.end_time, freq="1min")[::240]
def generate_dataset(times):
if os.path.isfile(path + times.strftime("%Y-%m-%d") + ".pkl"):
@@ -268,15 +265,15 @@ class HighFreqProvider:
if os.path.isfile(path + "tmp_dataset.pkl"):
start = time.time()
self.logger.info("Dataset exists, load from disk.", __name__)
print_log("Dataset exists, load from disk.", __name__)
else:
start = time.time()
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
self.logger.info("Generating dataset", __name__)
print_log("Generating dataset", __name__)
self._prepare_calender_cache()
dataset = init_instance_by_config(config)
self.logger.info(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
print_log(f"Dataset init, time cost: {time.time() - start:.2f}", __name__)
dataset.config(dump_all=False, recursive=True)
dataset.to_pickle(path + "tmp_dataset.pkl")
@@ -285,7 +282,7 @@ class HighFreqProvider:
instruments = D.instruments(market="all")
stock_list = D.list_instruments(
instruments=instruments, start_time=self.start_time, end_time=self.end_time, freq=self.freq, as_list=True
instruments=instruments, start_time=self.start_time, end_time=self.end_time, freq="1min", as_list=True
)
def generate_dataset(stock):

View File

@@ -96,11 +96,9 @@ def indicator_analysis(df, method="mean"):
index: Index(datetime)
method : str, optional
statistics method of pa/ffr, by default "mean"
- if method is 'mean', count the mean statistical value of each trade indicator
- if method is 'amount_weighted', count the deal_amount weighted mean statistical value of each trade indicator
- if method is 'value_weighted', count the value weighted mean statistical value of each trade indicator
Note: statistics method of pos is always "mean"
Returns
@@ -156,7 +154,6 @@ def backtest_daily(
E.g.
.. code-block:: python
# dict
strategy = {
"class": "TopkDropoutStrategy",
@@ -183,19 +180,16 @@ def backtest_daily(
# 3) specify module path with class name
# - "a.b.c.ClassName" getattr(<a.b.c.module>, "ClassName")() will be used.
executor : Union[str, dict, BaseExecutor]
for initializing the outermost executor.
benchmark: str
the benchmark for reporting.
account : Union[float, int, Position]
information for describing how to creating the account
For `float` or `int`:
Using Account with only initial cash
For `Position`:
Using Account with a Position
exchange_kwargs : dict
the kwargs for initializing Exchange
@@ -289,8 +283,8 @@ def long_short_backtest(
NOTE: This will be faster with offline qlib.
:return: The result of backtest, it is represented by a dict.
{ "long": long_returns(excess),
"short": short_returns(excess),
"long_short": long_short_returns}
"short": short_returns(excess),
"long_short": long_short_returns}
"""
if get_level_index(pred, level="datetime") == 1:
pred = pred.swaplevel().sort_index()

View File

@@ -4,7 +4,7 @@ try:
from .catboost_model import CatBoostModel
except ModuleNotFoundError:
CatBoostModel = None
print("ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)")
print("Please install necessary libs for CatBoostModel.")
try:
from .double_ensemble import DEnsembleModel
from .gbdt import LGBModel

View File

@@ -30,7 +30,6 @@ class DEnsembleModel(Model, FeatureInt):
sample_ratios=None,
sub_weights=None,
epochs=100,
early_stopping_rounds=None,
**kwargs
):
self.base_model = base_model # "gbm" or "mlp", specifically, we use lgbm for "gbm"
@@ -60,7 +59,6 @@ class DEnsembleModel(Model, FeatureInt):
self.params = {"objective": loss}
self.params.update(kwargs)
self.loss = loss
self.early_stopping_rounds = early_stopping_rounds
def fit(self, dataset: DatasetH):
df_train, df_valid = dataset.prepare(
@@ -105,19 +103,14 @@ class DEnsembleModel(Model, FeatureInt):
def train_submodel(self, df_train, df_valid, weights, features):
dtrain, dvalid = self._prepare_data_gbm(df_train, df_valid, weights, features)
evals_result = dict()
callbacks = [lgb.log_evaluation(20), lgb.record_evaluation(evals_result)]
if self.early_stopping_rounds:
callbacks.append(lgb.early_stopping(self.early_stopping_rounds))
self.logger.info("Training with early_stopping...")
model = lgb.train(
self.params,
dtrain,
num_boost_round=self.epochs,
valid_sets=[dtrain, dvalid],
valid_names=["train", "valid"],
callbacks=callbacks,
verbose_eval=20,
evals_result=evals_result,
)
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]

View File

@@ -28,7 +28,7 @@ class ADARNN(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str
@@ -56,7 +56,7 @@ class ADARNN(Model):
n_splits=2,
GPU=0,
seed=None,
**_
**kwargs
):
# Set logger.
self.logger = get_module_logger("ADARNN")
@@ -81,7 +81,7 @@ class ADARNN(Model):
self.optimizer = optimizer.lower()
self.loss = loss
self.n_splits = n_splits
self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.seed = seed
self.logger.info(
@@ -213,8 +213,7 @@ class ADARNN(Model):
weight_mat = self.transform_type(out_weight_list)
return weight_mat, None
@staticmethod
def calc_all_metrics(pred):
def calc_all_metrics(self, pred):
"""pred is a pandas dataframe that has two attributes: score (pred) and label (real)"""
res = {}
ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score))
@@ -260,6 +259,8 @@ class ADARNN(Model):
save_path = get_or_create_path(save_path)
stop_steps = 0
best_score = -np.inf
best_epoch = 0
evals_result["train"] = []
evals_result["valid"] = []
@@ -399,7 +400,7 @@ class AdaRNN(nn.Module):
self.model_type = model_type
self.trans_loss = trans_loss
self.len_seq = len_seq
self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
in_size = self.n_input
features = nn.ModuleList()
@@ -498,8 +499,7 @@ class AdaRNN(nn.Module):
res = self.softmax(weight).squeeze()
return res
@staticmethod
def get_features(output_list):
def get_features(self, output_list):
fea_list_src, fea_list_tar = [], []
for fea in output_list:
fea_list_src.append(fea[0 : fea.size(0) // 2])
@@ -561,7 +561,7 @@ class TransferLoss:
"""
self.loss_type = loss_type
self.input_dim = input_dim
self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
def compute(self, X, Y):
"""Compute adaptation loss
@@ -676,8 +676,7 @@ class MMD_loss(nn.Module):
self.fix_sigma = None
self.kernel_type = kernel_type
@staticmethod
def guassian_kernel(source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
n_samples = int(source.size()[0]) + int(target.size()[0])
total = torch.cat([source, target], dim=0)
total0 = total.unsqueeze(0).expand(int(total.size(0)), int(total.size(0)), int(total.size(1)))
@@ -692,8 +691,7 @@ class MMD_loss(nn.Module):
kernel_val = [torch.exp(-L2_distance / bandwidth_temp) for bandwidth_temp in bandwidth_list]
return sum(kernel_val)
@staticmethod
def linear_mmd(X, Y):
def linear_mmd(self, X, Y):
delta = X.mean(axis=0) - Y.mean(axis=0)
loss = delta.dot(delta.T)
return loss

View File

@@ -36,7 +36,7 @@ class ADD(Model):
d_feat : int
input dimensions for each time step
metric : str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : int

View File

@@ -30,7 +30,7 @@ class ALSTM(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : int

View File

@@ -33,7 +33,7 @@ class ALSTM(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : int

View File

@@ -33,7 +33,7 @@ class GATs(Model):
d_feat : int
input dimensions for each time step
metric : str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : int

View File

@@ -50,7 +50,7 @@ class GATs(Model):
d_feat : int
input dimensions for each time step
metric : str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : int

View File

@@ -30,7 +30,7 @@ class GRU(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -31,7 +31,7 @@ class GRU(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -34,7 +34,7 @@ class HIST(Model):
d_feat : int
input dimensions for each time step
metric : str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -32,7 +32,7 @@ class IGMTF(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -29,7 +29,7 @@ class LSTM(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -30,7 +30,7 @@ class LSTM(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -33,7 +33,7 @@ class TCN(Model):
n_chans: int
number of channels
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -30,7 +30,7 @@ class TCN(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -29,7 +29,7 @@ class TCTS(Model):
d_feat : int
input dimension for each time step
metric: str
the evaluation metric used in early stop
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str

View File

@@ -1,6 +1,5 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from functools import partial
import pandas as pd
@@ -11,11 +10,7 @@ import matplotlib.pyplot as plt
from scipy import stats
from typing import Sequence
from qlib.typehint import Literal
from ..graph import ScatterGraph, SubplotsGraph, BarGraph, HeatmapGraph
from ..utils import guess_plotly_rangebreaks
def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int = 5, **kwargs) -> tuple:
@@ -53,13 +48,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
t_df["long-average"] = t_df["Group1"] - pred_label.groupby(level="datetime")["label"].mean()
t_df = t_df.dropna(how="all") # for days which does not contain label
# FIXME: support HIGH-FREQ
t_df.index = t_df.index.strftime("%Y-%m-%d")
# Cumulative Return By Group
group_scatter_figure = ScatterGraph(
t_df.cumsum(),
layout=dict(
title="Cumulative Return",
xaxis=dict(tickangle=45, rangebreaks=kwargs.get("rangebreaks", guess_plotly_rangebreaks(t_df.index))),
),
layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
).figure
t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -116,36 +110,22 @@ def _plot_qq(data: pd.Series = None, dist=stats.norm) -> go.Figure:
return fig
def _pred_ic(
pred_label: pd.DataFrame = None, methods: Sequence[Literal["IC", "Rank IC"]] = ("IC", "Rank IC"), **kwargs
) -> tuple:
def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> tuple:
"""
:param pred_label: pd.DataFrame
must contain one column of realized return with name `label` and one column of predicted score names `score`.
:param methods: Sequence[Literal["IC", "Rank IC"]]
IC series to plot.
IC is sectional pearson correlation between label and score
Rank IC is the spearman correlation between label and score
For the Monthly IC, IC histogram, IC Q-Q plot. Only the first type of IC will be plotted.
:param pred_label:
:param rank:
:return:
"""
_methods_mapping = {"IC": "pearson", "Rank IC": "spearman"}
if rank:
ic = pred_label.groupby(level="datetime").apply(
lambda x: x["label"].rank(pct=True).corr(x["score"].rank(pct=True))
)
else:
ic = pred_label.groupby(level="datetime").apply(lambda x: x["label"].corr(x["score"]))
def _corr_series(x, method):
return x["label"].corr(x["score"], method=method)
ic_df = pd.concat(
[
pred_label.groupby(level="datetime").apply(partial(_corr_series, method=_methods_mapping[m])).rename(m)
for m in methods
],
axis=1,
)
_ic = ic_df.iloc(axis=1)[0]
_index = _ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
_monthly_ic = _ic.groupby(_index).mean()
_index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
_monthly_ic = ic.groupby(_index).mean()
_monthly_ic.index = pd.MultiIndex.from_arrays(
[_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
names=["year", "month"],
@@ -168,27 +148,27 @@ def _pred_ic(
_monthly_ic = _monthly_ic.reindex(fill_index)
ic_bar_figure = ic_figure(ic_df, kwargs.get("show_nature_day", False))
_ic_df = ic.to_frame("ic")
ic_bar_figure = ic_figure(_ic_df, kwargs.get("show_nature_day", True))
ic_heatmap_figure = HeatmapGraph(
_monthly_ic.unstack(),
layout=dict(title="Monthly IC", xaxis=dict(dtick=1), yaxis=dict(tickformat="04d", dtick=1)),
layout=dict(title="Monthly IC", yaxis=dict(tickformat=",d")),
graph_kwargs=dict(xtype="array", ytype="array"),
).figure
dist = stats.norm
_qqplot_fig = _plot_qq(_ic, dist)
_qqplot_fig = _plot_qq(ic, dist)
if isinstance(dist, stats.norm.__class__):
dist_name = "Normal"
else:
dist_name = "Unknown"
_ic_df = _ic.to_frame("IC")
_bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min()
_sub_graph_data = [
(
"IC",
"ic",
dict(
row=1,
col=1,
@@ -222,13 +202,12 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
pred = pred_label.copy()
pred["score_last"] = pred.groupby(level="instrument")["score"].shift(lag)
ac = pred.groupby(level="datetime").apply(lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True)))
# FIXME: support HIGH-FREQ
_df = ac.to_frame("value")
_df.index = _df.index.strftime("%Y-%m-%d")
ac_figure = ScatterGraph(
_df,
layout=dict(
title="Auto Correlation",
xaxis=dict(tickangle=45, rangebreaks=kwargs.get("rangebreaks", guess_plotly_rangebreaks(_df.index))),
),
layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
).figure
return (ac_figure,)
@@ -254,33 +233,32 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
"Bottom": bottom,
}
)
# FIXME: support HIGH-FREQ
r_df.index = r_df.index.strftime("%Y-%m-%d")
turnover_figure = ScatterGraph(
r_df,
layout=dict(
title="Top-Bottom Turnover",
xaxis=dict(tickangle=45, rangebreaks=kwargs.get("rangebreaks", guess_plotly_rangebreaks(r_df.index))),
),
layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
).figure
return (turnover_figure,)
def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure:
r"""IC figure
"""IC figure
:param ic_df: ic DataFrame
:param show_nature_day: whether to display the abscissa of non-trading day
:param \*\*kwargs: contains some parameters to control plot style in plotly. Currently, supports
- `rangebreaks`: https://plotly.com/python/time-series/#Hiding-Weekends-and-Holidays
:return: plotly.graph_objs.Figure
"""
if show_nature_day:
date_index = pd.date_range(ic_df.index.min(), ic_df.index.max())
ic_df = ic_df.reindex(date_index)
# FIXME: support HIGH-FREQ
ic_df.index = ic_df.index.strftime("%Y-%m-%d")
ic_bar_figure = BarGraph(
ic_df,
layout=dict(
title="Information Coefficient (IC)",
xaxis=dict(tickangle=45, rangebreaks=kwargs.get("rangebreaks", guess_plotly_rangebreaks(ic_df.index))),
xaxis=dict(type="category", tickangle=45),
),
).figure
return ic_bar_figure
@@ -294,13 +272,12 @@ def model_performance_graph(
rank=False,
graph_names: list = ["group_return", "pred_ic", "pred_autocorr"],
show_notebook: bool = True,
show_nature_day: bool = False,
**kwargs,
show_nature_day=True,
) -> [list, tuple]:
r"""Model performance
"""Model performance
:param pred_label: index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[score, label]**.
It is usually same as the label of model training(e.g. "Ref($close, -2)/Ref($close, -1) - 1").
:param pred_label: index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[score,
label]**. It is usually same as the label of model training(e.g. "Ref($close, -2)/Ref($close, -1) - 1").
.. code-block:: python
@@ -320,14 +297,17 @@ def model_performance_graph(
:param graph_names: graph names; default ['cumulative_return', 'pred_ic', 'pred_autocorr', 'pred_turnover'].
:param show_notebook: whether to display graphics in notebook, the default is `True`.
:param show_nature_day: whether to display the abscissa of non-trading day.
:param \*\*kwargs: contains some parameters to control plot style in plotly. Currently, supports
- `rangebreaks`: https://plotly.com/python/time-series/#Hiding-Weekends-and-Holidays
:return: if show_notebook is True, display in notebook; else return `plotly.graph_objs.Figure` list.
"""
figure_list = []
for graph_name in graph_names:
fun_res = eval(f"_{graph_name}")(
pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, **kwargs
pred_label=pred_label,
lag=lag,
N=N,
reverse=reverse,
rank=rank,
show_nature_day=show_nature_day,
)
figure_list += fun_res

View File

@@ -218,7 +218,6 @@ def cumulative_return_graph(
Graph desc:
- Axis X: Trading day.
- Axis Y:
- Above axis Y: `(((Ref($close, -1)/$close - 1) * weight).sum() / weight.sum()).cumsum()`.
@@ -243,8 +242,7 @@ def cumulative_return_graph(
:param label_data: `D.features` result; index is `pd.MultiIndex`, index name is [`instrument`, `datetime`]; columns names is [`label`].
**The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`
**The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`
.. code-block:: python

View File

@@ -39,7 +39,6 @@ def parse_position(position: dict = None) -> pd.DataFrame:
result_df = pd.DataFrame()
for _trading_date, _value in position.items():
_value = _value.position
# pd_date type: pd.Timestamp
_cash = _value.pop("cash")
for _item in ["now_account_value"]:

View File

@@ -99,8 +99,7 @@ def rank_label_graph(
:param position: position data; **qlib.backtest.backtest** result.
:param label_data: **D.features** result; index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[label]**.
**The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`.
**The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`.
.. code-block:: python

View File

@@ -119,7 +119,7 @@ def _get_risk_analysis_figure(analysis_df: pd.DataFrame) -> Iterable[py.Figure]:
_figure = SubplotsGraph(
_get_all_risk_analysis(analysis_df),
kind_map=dict(kind="BarGraph", kwargs={}),
subplots_kwargs={"rows": 1, "cols": 4},
subplots_kwargs={"rows": 4, "cols": 1},
).figure
return (_figure,)

View File

@@ -4,7 +4,6 @@
import pandas as pd
from ..graph import ScatterGraph
from ..utils import guess_plotly_rangebreaks
def _get_score_ic(pred_label: pd.DataFrame):
@@ -20,7 +19,7 @@ def _get_score_ic(pred_label: pd.DataFrame):
return pd.DataFrame({"ic": _ic, "rank_ic": _rank_ic})
def score_ic_graph(pred_label: pd.DataFrame, show_notebook: bool = True, **kwargs) -> [list, tuple]:
def score_ic_graph(pred_label: pd.DataFrame, show_notebook: bool = True) -> [list, tuple]:
"""score IC
Example:
@@ -54,13 +53,11 @@ def score_ic_graph(pred_label: pd.DataFrame, show_notebook: bool = True, **kwarg
:return: if show_notebook is True, display in notebook; else return **plotly.graph_objs.Figure** list.
"""
_ic_df = _get_score_ic(pred_label)
# FIXME: support HIGH-FREQ
_ic_df.index = _ic_df.index.strftime("%Y-%m-%d")
_figure = ScatterGraph(
_ic_df,
layout=dict(
title="Score IC",
xaxis=dict(tickangle=45, rangebreaks=kwargs.get("rangebreaks", guess_plotly_rangebreaks(_ic_df.index))),
),
layout=dict(title="Score IC", xaxis=dict(type="category", tickangle=45)),
graph_kwargs={"mode": "lines+markers"},
).figure
if show_notebook:

View File

@@ -139,8 +139,8 @@ class FeaACAna(FeaAnalyser):
class FeaSkewTurt(NumFeaAnalyser):
def calc_stat_values(self):
self._skew = datetime_groupby_apply(self._dataset, "skew")
self._kurt = datetime_groupby_apply(self._dataset, pd.DataFrame.kurt)
self._skew = datetime_groupby_apply(self._dataset, "skew", skip_group=True)
self._kurt = datetime_groupby_apply(self._dataset, pd.DataFrame.kurt, skip_group=True)
def plot_single(self, col, ax):
self._skew[col].plot(ax=ax, label="skew")

View File

@@ -1,7 +1,6 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import matplotlib.pyplot as plt
import pandas as pd
def sub_fig_generator(sub_fs=(3, 3), col_n=10, row_n=1, wspace=None, hspace=None, sharex=False, sharey=False):
@@ -44,31 +43,3 @@ def sub_fig_generator(sub_fs=(3, 3), col_n=10, row_n=1, wspace=None, hspace=None
res = res.item()
yield res
plt.show()
def guess_plotly_rangebreaks(dt_index: pd.DatetimeIndex):
"""
This function `guesses` the rangebreaks required to remove gaps in datetime index.
It basically calculates the difference between a `continuous` datetime index and index given.
For more details on `rangebreaks` params in plotly, see
https://plotly.com/python/reference/layout/xaxis/#layout-xaxis-rangebreaks
Parameters
----------
dt_index: pd.DatetimeIndex
The datetimes of the data.
Returns
-------
the `rangebreaks` to be passed into plotly axis.
"""
dt_idx = dt_index.sort_values()
gaps = dt_idx[1:] - dt_idx[:-1]
min_gap = gaps.min()
gaps_to_break = {}
for gap, d in zip(gaps, dt_idx[:-1]):
if gap > min_gap:
gaps_to_break.setdefault(gap - min_gap, []).append(d + min_gap)
return [dict(values=v, dvalue=int(k.total_seconds() * 1000)) for k, v in gaps_to_break.items()]

View File

@@ -25,14 +25,12 @@ class SoftTopkStrategy(WeightStrategyBase):
common_infra=None,
**kwargs,
):
"""
Parameters
----------
"""Parameter
topk : int
top-N stocks to buy
risk_degree : float
position percentage of total value buy_method:
position percentage of total value
buy_method :
rank_fill: assign the weight stocks that rank high first(1/topk max)
average_fill: assign the weight to the stocks rank high averagely.
"""
@@ -53,19 +51,12 @@ class SoftTopkStrategy(WeightStrategyBase):
return self.risk_degree
def generate_target_weight_position(self, score, current, trade_start_time, trade_end_time):
"""
Parameters
----------
score:
pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
current:
current position, use Position() class
trade_date:
trade date
generate target position from score for this date and the current position
The cache is not considered in the position
"""Parameter:
score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
current : current position, use Position() class
trade_date : trade date
generate target position from score for this date and the current position
The cache is not considered in the position
"""
# TODO:
# If the current stock list is more than topk(eg. The weights are modified

View File

@@ -33,14 +33,10 @@ class OrderGenerator:
:type target_weight_position: dict
:param risk_degree:
:type risk_degree: float
:param pred_start_time:
:type pred_start_time: pd.Timestamp
:param pred_end_time:
:type pred_end_time: pd.Timestamp
:param trade_start_time:
:type trade_start_time: pd.Timestamp
:param trade_end_time:
:type trade_end_time: pd.Timestamp
:param pred_date: the date the score is predicted
:type pred_date: pd.Timestamp
:param trade_date: the date the stock is traded
:type trade_date: pd.Timestamp
:rtype: list
"""
@@ -76,14 +72,10 @@ class OrderGenWInteract(OrderGenerator):
:type target_weight_position: dict
:param risk_degree:
:type risk_degree: float
:param pred_start_time:
:type pred_start_time: pd.Timestamp
:param pred_end_time:
:type pred_end_time: pd.Timestamp
:param trade_start_time:
:type trade_start_time: pd.Timestamp
:param trade_end_time:
:type trade_end_time: pd.Timestamp
:param pred_date:
:type pred_date: pd.Timestamp
:param trade_date:
:type trade_date: pd.Timestamp
:rtype: list
"""
@@ -155,12 +147,9 @@ class OrderGenWOInteract(OrderGenerator):
) -> list:
"""generate_order_list_from_target_weight_position
generate order list directly not using the information (e.g. whether can be traded, the accurate trade price)
at trade date.
In target weight position, generating order list need to know the price of objective stock in trade date,
but we cannot get that
value when do not interact with exchange, so we check the %close price at pred_date or price recorded
in current position.
generate order list directly not using the information (e.g. whether can be traded, the accurate trade price) at trade date.
In target weight position, generating order list need to know the price of objective stock in trade date, but we cannot get that
value when do not interact with exchange, so we check the %close price at pred_date or price recorded in current position.
:param current:
:type current: Position
@@ -170,14 +159,10 @@ class OrderGenWOInteract(OrderGenerator):
:type target_weight_position: dict
:param risk_degree:
:type risk_degree: float
:param pred_start_time:
:type pred_start_time: pd.Timestamp
:param pred_end_time:
:type pred_end_time: pd.Timestamp
:param trade_start_time:
:type trade_start_time: pd.Timestamp
:param trade_end_time:
:type trade_end_time: pd.Timestamp
:param pred_date:
:type pred_date: pd.Timestamp
:param trade_date:
:type trade_date: pd.Timestamp
:rtype: list of generated orders
"""
@@ -200,8 +185,7 @@ class OrderGenWOInteract(OrderGenerator):
* target_weight_position[stock_id]
/ trade_exchange.get_close(stock_id, start_time=pred_start_time, end_time=pred_end_time)
)
# TODO: Qlib use None to represent trading suspension.
# So last close price can't be the estimated trading price.
# TODO: Qlib use None to represent trading suspension. So last close price can't be the estimated trading price.
# Maybe a close price with forward fill will be a better solution.
elif stock_id in current_stock:
amount_dict[stock_id] = (

Some files were not shown because too many files have changed in this diff Show More