1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-29 09:01:18 +08:00

Compare commits

...

169 Commits

Author SHA1 Message Date
Yuchen Fang
56edc16089 fix(rl): 🐛 dataFrame selection problem in high-freq execution workflow (#1348) 2022-11-11 11:32:22 +08:00
Kan Ren
2b8462d137 Update README.md 2022-01-24 17:27:42 +08:00
Mingzhe-Han
1979cac50a Merge pull request #632 from Mingzhe-Han/high-freq-execution
High freq execution
2021-10-03 21:13:13 +08:00
v-mingzhehan
424a48d0fb change freq 2021-09-17 14:21:57 +00:00
Mingzhe-Han
202bbea272 Merge pull request #1 from microsoft/high-freq-execution
High freq execution
2021-09-17 11:09:07 +08:00
Mingzhe-Han
6a22136366 Merge branch 'high-freq-execution' into high-freq-execution 2021-09-17 11:08:53 +08:00
Kan Ren
603c282415 update arxiv paper link. 2021-04-02 08:15:35 +08:00
Kan Ren
22abe852f7 Merge pull request #353 from Arthur-Null/high-freq-execution
High freq execution
2021-03-19 14:18:47 +08:00
Yuchen Fang
e3f463010b Merge remote-tracking branch 'upstream/high-freq-execution' into high-freq-execution 2021-03-19 14:11:08 +08:00
Yuchen Fang
80aa08215f bug fix 2021-03-19 14:03:36 +08:00
Mingzhe Han
b3893067f7 delete notes 2021-02-24 16:04:17 +08:00
Mingzhe Han
e6dfccce2f fix OPDT_backtest bugs 2021-02-24 13:44:20 +08:00
Mingzhe Han
f9c30f9834 update Dependencies 2021-02-23 18:53:11 +08:00
Mingzhe Han
f164bf8411 Add config file in trade
Update readme in trade
Update highfreq to delete nan order
2021-02-23 18:53:11 +08:00
Mingzhe Han
1f28044d84 update Dependencies 2021-02-23 17:25:29 +08:00
Mingzhe Han
3cf0d27a07 Add config file in trade
Update readme in trade
Update highfreq to delete nan order
2021-02-23 15:51:50 +08:00
Kan Ren
bcae4bb22e Update README.md 2021-01-29 00:11:02 +08:00
you-n-g
f680a564a0 Merge pull request #228 from microsoft/main
Update data handler
2021-01-29 00:09:22 +08:00
you-n-g
828993b397 Merge pull request #222 from bxdd/rl-highfreq-include-examples
Qlib Highfreq Support & Highfreq DataHanlder/Operator/Processor Examples
2021-01-29 00:08:10 +08:00
bxdd
8ef89b4fa8 update 2021-01-28 15:01:07 +00:00
bxdd
76cf9dad99 update 2021-01-28 14:30:20 +00:00
bxdd
f3eb02a0bd update docstring 2021-01-28 14:26:30 +00:00
bxdd
ffa68fd010 update 2021-01-28 14:25:55 +00:00
bxdd
f6dd006c35 update 2021-01-28 11:31:15 +00:00
you-n-g
9cd41e5a81 Merge pull request #227 from Arthur-Null/high-freq-execution
High freq execution
2021-01-28 16:27:05 +08:00
Kan Ren
e23022e9d8 Update README.md: description about OE 2021-01-28 16:24:25 +08:00
Yuchen Fang
ebbbec2a6c Merge branch 'high-freq-execution' into high-freq-execution 2021-01-28 16:23:45 +08:00
Yuchen Fang
13d39e6bbc refine readme 2021-01-28 16:11:58 +08:00
Yuchen Fang
b96aab6bef minor 2021-01-28 14:30:54 +08:00
Yuchen Fang
700eef4164 README 2021-01-28 14:29:31 +08:00
Yuchen Fang
31c7d72485 minor 2021-01-28 14:22:55 +08:00
Yuchen Fang
30ad1967a2 requirements 2021-01-28 14:21:51 +08:00
Yuchen Fang
0c6cad1d7b rename 2021-01-28 14:14:33 +08:00
you-n-g
a0f22571de Update README.md 2021-01-28 09:44:03 +08:00
you-n-g
6835b2f67e Update README.md 2021-01-28 09:40:42 +08:00
Yuchen Fang
7c4971e566 minor 2021-01-28 09:22:39 +08:00
Yuchen Fang
70a9d42c7d format 2021-01-28 09:22:39 +08:00
Yuchen Fang
bcadf47f32 trade 2021-01-28 09:22:39 +08:00
Yuchen Fang
4dc14a2489 minor 2021-01-28 00:41:22 +08:00
Yuchen Fang
a03b08bb4c format 2021-01-28 00:41:02 +08:00
Yuchen Fang
98086e4fdc trade 2021-01-28 00:34:32 +08:00
you-n-g
8c29105bca Update cache.py 2021-01-27 19:52:33 +08:00
bxdd
948b829ff4 add get_data in highfreq 2021-01-27 10:34:31 +00:00
Jactus
304a0c3d7a Add paper year 2021-01-27 18:15:52 +08:00
bxdd
02dea2aeb6 update paused 2021-01-27 07:42:00 +00:00
bxdd
6fc4f2b249 fix a bug 2021-01-27 07:02:59 +00:00
bxdd
2a5f06ee9e update dataset test 2021-01-27 06:25:40 +00:00
zhupr
7f9216dc90 Fix the number of minutes on the first and last trading day of high frequency 2021-01-27 10:59:46 +08:00
zhupr
263ccdfe6f US stock code supports Windows 2021-01-27 10:59:46 +08:00
zhupr
1a8f1bfc57 support collecting yahoo 1min data 2021-01-27 10:59:46 +08:00
bxdd
9dc11a9e3c Merge github.com:microsoft/qlib into qlib_register_ops 2021-01-26 17:12:33 +00:00
bxdd
3bdd54308b update some little code 2021-01-26 17:02:30 +00:00
bxdd
1b569d371d simpson vwap 2021-01-26 14:32:08 +00:00
you-n-g
36e5c601de Merge pull request #78 from zhupr/main
Fix the error when the stock code is a number
2021-01-26 21:50:21 +08:00
zhupr
ae45711e2b Merge remote-tracking branch 'qlib/main' into save_inst 2021-01-26 19:42:59 +08:00
you-n-g
bcc47aa4cb Merge pull request #92 from bxdd/qlib_register_ops
Support Register of Custom Feature Operators Easily
2021-01-26 18:53:43 +08:00
bxdd
ee94634b23 black 2021-01-26 08:47:53 +00:00
bxdd
2016ebbbb2 update tests 2021-01-26 08:47:07 +00:00
zhupr
1eaf09cce1 version removed .dev 2021-01-26 16:29:26 +08:00
zhupr
7579f4b4c0 Merge remote-tracking branch 'qlib/main' into save_inst 2021-01-26 16:14:11 +08:00
zhupr
1a1c45981c US stock code supports Windows 2021-01-26 16:06:38 +08:00
bxdd
e4ecea55e4 fix 2021-01-26 07:41:22 +00:00
bxdd
58616fced9 black format 2021-01-26 07:33:50 +00:00
bxdd
8e9ca22b07 del some print 2021-01-26 07:33:26 +00:00
bxdd
6a145df87c fix bug 2021-01-26 07:32:06 +00:00
bxdd
06dbd02b99 black format 2021-01-25 17:59:48 +00:00
bxdd
ffedb6382f add highfreq example 2021-01-25 17:58:45 +00:00
zhupr
3f9f295a87 add register in config 2021-01-24 11:22:02 +08:00
Wendi Li
84d77f4585 Update pytorch_nn.py 2021-01-24 10:40:47 +08:00
you-n-g
afdf58b4fa Update serial.py 2021-01-24 10:36:56 +08:00
Alex Wang
2b6d16feb1 fix naming 2021-01-22 19:16:57 +08:00
Alex Wang
0a86a6f392 update format 2021-01-22 19:16:57 +08:00
Alex Wang
5da5ad4b9f tabnet 2021-01-22 19:16:57 +08:00
you-n-g
dd07810b66 Update README.md 2021-01-22 12:53:05 +08:00
bxdd
a762248d98 update test&docs 2021-01-22 01:06:32 +09:00
bxdd
80c9a47e51 Merge github.com:microsoft/qlib into qlib_register_ops 2021-01-22 00:52:30 +09:00
王雪
784e73bceb black formatting 2021-01-21 00:07:03 +08:00
王雪
5ad1b4cc33 for IDE auto-complete with global Wrapper
R, D, Cal, Inst, FeatureD, ExpressionD, DatasetD, D
2021-01-21 00:07:03 +08:00
王雪
e85646762c Update .gitignore 2021-01-20 22:12:35 +08:00
Young
fc81a39317 Add dataset standalone usage example 2021-01-20 21:14:27 +08:00
you-n-g
d44c5bb2b2 Update README.md 2021-01-20 21:14:03 +08:00
bxdd
c622d3f6f8 Update data.rst 2021-01-20 18:55:30 +08:00
bxdd
6daaa79519 add register ops config 2021-01-20 18:44:53 +09:00
zhupr
3dda2cb379 Merge remote-tracking branch 'qlib/main' into qlib_register_ops 2021-01-20 15:16:06 +08:00
zhupr
4fcfde7cfb Initialization is split into: set_config and config_based_on_C 2021-01-20 15:06:18 +08:00
bxdd
3403c00b6b Update requirements.txt
fix readthedocs cant find cmake error
2021-01-19 20:35:11 +08:00
bxdd
ecdfe49fd1 del custom ops test for check the CI status 2021-01-19 20:39:15 +09:00
bxdd
cc214a3462 black format 2021-01-19 09:14:17 +08:00
bxdd
65d8af41e7 restructure backtest 2021-01-19 09:14:17 +08:00
bxdd
0e0970f06e update backtest 2021-01-19 09:14:17 +08:00
bxdd
917261dbf6 update backtest 2021-01-19 09:14:17 +08:00
bxdd
6a9105e065 add highfreq_backtest 2021-01-19 09:14:17 +08:00
王雪
570bb272eb fix setup error
why required pymongo
2021-01-18 19:37:24 +08:00
Wendi Li
0524a47cf4 Update pytorch_lstm_ts.py 2021-01-18 12:20:40 +08:00
Wendi Li
9abc0b0d4f Update pytorch_gru_ts.py 2021-01-18 12:20:31 +08:00
Wendi Li
fe60e40927 Update pytorch_gats_ts.py 2021-01-18 12:20:20 +08:00
Wendi Li
740c297618 Update pytorch_alstm_ts.py 2021-01-18 12:20:00 +08:00
Anon-Artist
b4a088efe8 Update cli.py 2021-01-14 18:42:33 +08:00
Jactus
b34890772f Make note more clear 2021-01-13 19:19:48 +08:00
Jactus
054ffa29f6 Update readme 2021-01-13 19:19:48 +08:00
Jactus
74e08c9e37 Add deepcopy to config 2021-01-13 19:19:48 +08:00
Jactus
ea96c9e22d Update docs and support Python 3.9 2021-01-13 19:19:48 +08:00
王雪
86e7c44c6b Update initialization.rst
need line changing
2021-01-13 15:28:05 +08:00
you-n-g
64cf2e2df8 Update data.rst 2021-01-12 18:43:05 +08:00
Jactus
4361a4049a Fix create_recorder bug 2021-01-07 18:30:18 +08:00
Zhichong Fang
231f37376b Fix unrecognized config bug 2021-01-07 18:28:17 +08:00
you-n-g
328cdeda4a Update README.md 2021-01-07 11:12:49 +08:00
Zhichong Fang
4dbc8e52ec Update data.py
Fix some typo
2021-01-06 16:36:23 +08:00
Young
ba447d3448 update valute 2021-01-06 14:43:14 +08:00
zhupr
df556532d0 Fix the error when the stock code is a number 2021-01-06 11:21:33 +08:00
Wendi Li
18e040f506 Update workflow_config_gru_Alpha158.yaml
Delete a redundant parameter.
2021-01-04 17:05:21 +08:00
Wendi Li
aefc98b1d7 Update workflow_config_lstm_Alpha158.yaml
Delete a redundant parameter.
2021-01-04 17:05:13 +08:00
Jactus
46c8d791ac Fix doc bugs 2020-12-30 23:51:05 +08:00
Young
afcd91a2d0 black format 2020-12-28 12:04:03 +00:00
Young
4a30d9d1ec update github issue template 2020-12-28 12:02:01 +00:00
you-n-g
2da2e9bd9e Update README.md 2020-12-26 20:21:30 +08:00
you-n-g
3e6877ff0f Update README.md 2020-12-25 22:01:18 +08:00
zhupr
a0f32036a6 Fix the first trading day of the calendar extra in report_df 2020-12-24 11:22:48 +08:00
bxdd
d8f36df7f4 debug on macos 2020-12-23 18:28:05 +00:00
bxdd
cb3b6c5bde black format 2020-12-23 16:41:32 +00:00
bxdd
b11712fa54 fix cant find ops error on Windows 2020-12-23 16:39:17 +00:00
Jactus
660edeb94f Remove fm in recorder 2020-12-23 21:14:53 +08:00
Jactus
95de4088df Fix recorder temp dir bug 2020-12-23 21:14:53 +08:00
hadrianl
e8d7a22651 fix _adjust_size 2020-12-23 17:39:04 +08:00
hadrianl
4a62b929ad add _get_value_size and remove _limit_flag 2020-12-23 17:39:04 +08:00
hadrianl
5efe82fb56 make code cleaner 2020-12-23 17:39:04 +08:00
hadrianl
40bbafcaab black format 2020-12-23 17:39:04 +08:00
hadrianl
4c4f0f3c5e black format 2020-12-23 17:39:04 +08:00
hadrianl
ae0e0eca3d better MemCacheUnit implement 2020-12-23 17:39:04 +08:00
bxdd
7e37fa710a update alpha.rst 2020-12-21 23:31:31 +08:00
bxdd
e0c460c33c Update alpha.rst 2020-12-21 23:31:31 +08:00
bxdd
53f501ac19 del import 2020-12-21 12:44:27 +00:00
bxdd
132df027a5 update format 2020-12-21 12:09:25 +00:00
bxdd
7d97fd39ce update ops register 2020-12-21 12:06:42 +00:00
Young
995fa98fc6 add more doc to PortAnaRecord 2020-12-20 16:11:07 +08:00
Maciej Domagała
824de921d1 fixing typos #4 2020-12-19 11:59:23 +08:00
Maciej Domagała
66d9bd1a68 fixing typos #3
I just randomly find these by the way. Good work on the framework!
2020-12-18 20:16:54 +08:00
you-n-g
1c0bb2f827 Merge pull request #97 from Derek-Wds/main
Update benchmark performance
2020-12-17 17:12:40 +08:00
Maciej Domagała
ea018ed4dc fixing typos #2 2020-12-17 17:12:18 +08:00
hadrianl
f3f1867b14 fix wrong attribute 2020-12-17 15:04:07 +08:00
hadrianl
8bbfd8810c formatting 2020-12-17 15:04:07 +08:00
hadrianl
3f84c3768a Make __getattr__ to raise AttributeError instead of return it.Avoid using try except. 2020-12-17 15:04:07 +08:00
Dingsu Wang
7372a3a598 Merge branch 'main' into main 2020-12-17 14:43:21 +08:00
Jactus
4b4cd38ca6 Update benchmark results 2020-12-17 14:41:12 +08:00
you-n-g
7d40ba753a Update README.md 2020-12-17 00:35:35 +08:00
Young
9b60214e0c make info more friendly 2020-12-16 02:16:06 +00:00
Young
f7e775f941 make message more friendly 2020-12-16 02:14:38 +00:00
Young
aefbf3b5f1 update collect info 2020-12-15 13:24:29 +00:00
G_will
3f85af05e5 Refactor to Python3 style 2020-12-15 20:37:43 +08:00
Jactus
192c2dc5ef Add demo 2020-12-15 20:33:32 +08:00
Jactus
911edd7839 Add stale bot 2020-12-15 20:31:38 +08:00
Maciej Domagała
3d47dd78c8 Typo fix 2020-12-15 20:29:30 +08:00
Jactus
8f6ab0af54 Format 2020-12-14 19:23:43 +08:00
Jactus
cb0b6fcdaa Update CI and script 2020-12-14 19:23:43 +08:00
Yifan Deng (FA Talent)
6b8824dd29 Update Sign in ops.py 2020-12-14 16:55:23 +08:00
Yifan Deng
c217e7c479 Update ops.py
Fix the bug when Sign followed by True/False
2020-12-14 16:55:23 +08:00
you-n-g
ea4fe1577b Update README.md 2020-12-14 13:05:12 +08:00
you-n-g
1bab07e419 Update README.md 2020-12-13 22:45:07 +08:00
bxdd
422d1d8c93 Update README.md 2020-12-12 19:41:16 +08:00
bxdd
c8f9b1162d Update README.md 2020-12-12 19:01:00 +08:00
Young
e2bdef7ffe update version number to dev 2020-12-12 10:09:18 +00:00
Jactus
c10955d026 Update tft 2020-12-11 14:33:16 +08:00
Jactus
d642c7b6ea Update benchmark performance 2020-12-11 09:55:37 +08:00
bxdd
0cdc5e125a update docs 2020-12-10 10:08:29 +00:00
bxdd
2de812f262 update ops docs 2020-12-10 10:04:09 +00:00
bxdd
16450c2876 fix import 2020-12-10 09:54:05 +00:00
bxdd
729b57e4a7 add example script 2020-12-10 09:11:12 +00:00
bxdd
87cc52cd05 black format 2020-12-10 09:02:43 +00:00
bxdd
0be57d51be support register custom feature ops easily 2020-12-10 09:00:00 +00:00
128 changed files with 8378 additions and 1020 deletions

View File

@@ -28,7 +28,8 @@ Steps to reproduce the behavior:
## Environment
**Note**: One could run `python scripts/collect_info.py` under the `qlib` directory to get the following information.
**Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information
and paste them here directly.
- Qlib version:
- Python version:
@@ -37,4 +38,4 @@ Steps to reproduce the behavior:
## Additional Notes
<!-- Add any other information about the problem here. -->
<!-- Add any other information about the problem here. -->

62
.github/stale.yml vendored Normal file
View File

@@ -0,0 +1,62 @@
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 60
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 7
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- bug
- pinned
- security
- "[Status] Maybe Later"
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false
# Label to use when marking as stale
staleLabel: wontfix
# Comment to post when marking as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Comment to post when closing a stale Issue or Pull Request.
# closeComment: >
# Your comment here.
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 30
# Limit to only `issues` or `pulls`
# only: issues
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
# pulls:
# daysUntilStale: 30
# markComment: >
# This pull request has been automatically marked as stale because it has not had
# recent activity. It will be closed if no further activity occurs. Thank you
# for your contributions.
# issues:
# exemptLabels:
# - confirmed

View File

@@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest]
python-version: [3.6, 3.7, 3.8]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
@@ -22,9 +22,58 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Lint with Black
run: |
cd ..
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install black
$CONDA\\python.exe -m black qlib -l 120 --check --diff
else
sudo $CONDA/bin/python -m pip install black
$CONDA/bin/python -m black qlib -l 120 --check --diff
fi
shell: bash
# Test Qlib installed with pip
- name: Install Qlib with pip
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml --user
else
sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml
fi
shell: bash
- name: Install dependencies
run: |
- name: Install Lightgbm for MacOS
if: runner.os == 'macOS'
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
- name: Test data downloads
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
else
$CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
fi
shell: bash
- name: Test workflow by config (install from pip)
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml
$CONDA\\python.exe -m pip uninstall -y pyqlib
else
$CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
sudo $CONDA/bin/python -m pip uninstall -y pyqlib
fi
shell: bash
# Test Qlib installed from source
- name: Install Qlib from source
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install --upgrade cython
$CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions
@@ -36,13 +85,7 @@ jobs:
sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
sudo $CONDA/bin/python setup.py install
fi
shell: bash
- name: Install Lightgbm for MacOS
if: runner.os == 'macOS'
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
shell: bash
- name: Install test dependencies
run: |
@@ -54,16 +97,6 @@ jobs:
sudo $CONDA/bin/python -m pip install black pytest
fi
shell: bash
- name: Lint with Black
run: |
cd ..
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m black qlib -l 120 --check --diff
else
$CONDA/bin/python -m black qlib -l 120 --check --diff
fi
shell: bash
- name: Unit tests with Pytest
run: |
@@ -73,22 +106,13 @@ jobs:
else
$CONDA/bin/python -m pytest . --durations=0
fi
shell: bash
shell: bash
- name: Test data downloads
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
else
$CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
fi
shell: bash
- name: Test workflow by config
- name: Test workflow by config (install from source)
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml
else
$CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
fi
shell: bash
shell: bash

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
__pycache__/
*.pyc
*.pyd
*.so
*.ipynb
.ipynb_checkpoints

View File

@@ -31,9 +31,11 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
- [Run a single model](#run-a-single-model)
- [Run multiple models](#run-multiple-models)
- [**Quant Dataset Zoo**](#quant-dataset-zoo)
- [High-frequency execution](#high-frequency-execution)
- [More About Qlib](#more-about-qlib)
- [Offline Mode and Online Mode](#offline-mode-and-online-mode)
- [Performance of Qlib Data Server](#performance-of-qlib-data-server)
- [Related Reports](#related-reports)
- [Contributing](#contributing)
@@ -61,11 +63,27 @@ At the module level, Qlib is a platform that consists of the above components. T
This quick start guide tries to demonstrate
1. It's very easy to build a complete Quant research workflow and try your ideas with _Qlib_.
1. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
2. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how to install ``Qlib``, and run LightGBM with ``qrun``. **But**, please make sure you have already prepared the data following the [instruction](#data-preparation).
## Installation
Users can easily install ``Qlib`` by pip according to the following command
This table demonstrates the supported Python version of `Qlib`:
| | install with pip | install from source | plot |
| ------------- |:---------------------:|:--------------------:|:----:|
| Python 3.6 | :heavy_check_mark: | :heavy_check_mark: (only with `Anaconda`) | :heavy_check_mark: |
| Python 3.7 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.8 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.9 | :x: | :heavy_check_mark: | :x: |
**Note**:
1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
2. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
### Install with pip
Users can easily install ``Qlib`` by pip according to the following command.
```bash
pip install pyqlib
@@ -73,6 +91,7 @@ Users can easily install ``Qlib`` by pip according to the following command
**Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below.
### Install from source
Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps:
* Before installing ``Qlib`` from source, users need to install some dependencies:
@@ -81,7 +100,6 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
pip install numpy
pip install --upgrade cython
```
**Note**: Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
* Clone the repository and install ``Qlib`` as follows.
* If you haven't installed qlib by the command ``pip install pyqlib`` before:
@@ -94,7 +112,9 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
git clone https://github.com/microsoft/qlib.git && cd qlib
pip install .
```
**Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, which the command ``python setup.py install`` **can't**.
**Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test.yml) may help you find the problem.
## Data Preparation
Load and prepare data by running the following code:
@@ -138,12 +158,16 @@ Users could create the same dataset with it.
## Auto Quant Research Workflow
Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps:
1. Quant Research Workflow: Run `qrun` with lightgbm workflow config ([workflow_config_lightgbm.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm.yaml)) as following.
1. Quant Research Workflow: Run `qrun` with lightgbm workflow config ([workflow_config_lightgbm_Alpha158.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml) as following.
```bash
cd examples # Avoid running program under the directory contains `qlib`
qrun benchmarks/LightGBM/workflow_config_lightgbm.yaml
qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
```
The result of `qrun` is as follows, please refer to please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result.
If users want to use `qrun` under debug mode, please use the following command:
```bash
python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
```
The result of `qrun` is as follows, please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result.
```bash
@@ -198,16 +222,17 @@ The automatic workflow may not suite the research workflow of all Quant research
# [Quant Model Zoo](examples/benchmarks)
Here is a list of models built on `Qlib`.
- [GBDT based on LightGBM (Guolin Ke, et al.)](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost (Liudmila Prokhorenkova, et al.)](qlib/contrib/model/catboost_model.py)
- [GBDT based on XGBoost (Tianqi Chen, et al.)](qlib/contrib/model/xgboost.py)
- [GBDT based on XGBoost (Tianqi Chen, et al. 2016)](qlib/contrib/model/xgboost.py)
- [GBDT based on LightGBM (Guolin Ke, et al. 2017)](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. 2017)](qlib/contrib/model/catboost_model.py)
- [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
- [GRU based on pytorch (Kyunghyun Cho, et al.)](qlib/contrib/model/pytorch_gru.py)
- [LSTM based on pytorcn (Sepp Hochreiter, et al.)](qlib/contrib/model/pytorch_lstm.py)
- [ALSTM based on pytorcn (Yao Qin, et al.)](qlib/contrib/model/pytorch_alstm.py)
- [GATs based on pytorch (Petar Velickovic, et al.)](qlib/contrib/model/pytorch_gats.py)
- [SFM based on pytorch (Liheng Zhang, et al.)](qlib/contrib/model/pytorch_sfm.py)
- [TFT based on tensorflow (Bryan Lim, et al.)](examples/benchmarks/TFT/tft.py)
- [LSTM based on pytorch (Sepp Hochreiter, et al. 1997)](qlib/contrib/model/pytorch_lstm.py)
- [GRU based on pytorch (Kyunghyun Cho, et al. 2014)](qlib/contrib/model/pytorch_gru.py)
- [ALSTM based on pytorch (Yao Qin, et al. 2017)](qlib/contrib/model/pytorch_alstm.py)
- [GATs based on pytorch (Petar Velickovic, et al. 2017)](qlib/contrib/model/pytorch_gats.py)
- [SFM based on pytorch (Liheng Zhang, et al. 2017)](qlib/contrib/model/pytorch_sfm.py)
- [TFT based on tensorflow (Bryan Lim, et al. 2019)](examples/benchmarks/TFT/tft.py)
- [TabNet based on pytorch (Sercan O. Arik, et al. 2019)](qlib/contrib/model/pytorch_tabnet.py)
Your PR of new Quant models is highly welcomed.
@@ -246,6 +271,14 @@ Dataset plays a very important role in Quant. Here is a list of the datasets bui
[Here](https://qlib.readthedocs.io/en/latest/advanced/alpha.html) is a tutorial to build dataset with `Qlib`.
Your PR to build new Quant dataset is highly welcomed.
# High-Frequency Execution
High-frequency order execution is a fundamental problem in quantitative finance.
It aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument.
AI has the potential to mine patterns from a huge mass of high-frequency market data and helps traders make better decisions during order execution.
Here is a list of solutions built on `Qlib`.
- [Universal Trading for Order Execution with Oracle Policy Distillation](examples/trade/)
# More About Qlib
The detailed documents are organized in [docs](docs/).
[Sphinx](http://www.sphinx-doc.org) and the readthedocs theme is required to build the documentation in html formats.
@@ -288,7 +321,11 @@ Such overheads greatly slow down the data loading process.
Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation.
# Related Reports
- [Guide To Qlib: Microsofts AI Investment Platform](https://analyticsindiamag.com/qlib/)
- [【华泰金工林晓明团队】微软AI量化投资平台Qlib体验——华泰人工智能系列之四十](https://mp.weixin.qq.com/s/Brcd7im4NibJOJzZfMn6tQ)
- [微软也搞AI量化平台还是开源的](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ)
- [微矿Qlib业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ)
# Contributing

12
docs/_static/demo.sh vendored Normal file
View File

@@ -0,0 +1,12 @@
#!/bin/sh
git clone https://github.com/microsoft/qlib.git
cd qlib
ls
pip install pyqlib
# or
# pip install numpy
# pip install --upgrade cython
# python setup.py install
cd examples
ls
qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

View File

@@ -50,57 +50,37 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib:
.. code-block:: python
>> from qlib.data.dataset.handler import QLibDataHandler
>> from qlib.data.dataset.loader import QlibDataLoader
>> MACD_EXP = '(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'
>> fields = [MACD_EXP] # MACD
>> names = ['MACD']
>> labels = ['$close'] # label
>> labels = ['Ref($close, -2)/Ref($close, -1) - 1'] # label
>> label_names = ['LABEL']
>> data_handler = QLibDataHandler(start_date='2010-01-01', end_date='2017-12-31', fields=fields, names=names, labels=labels, label_names=label_names)
>> TRAINER_CONFIG = {
.. "train_start_date": "2007-01-01",
.. "train_end_date": "2014-12-31",
.. "validate_start_date": "2015-01-01",
.. "validate_end_date": "2016-12-31",
.. "test_start_date": "2017-01-01",
.. "test_end_date": "2020-08-01",
>> data_loader_config = {
.. "feature": (fields, names),
.. "label": (labels, label_names)
.. }
>> feature_train, label_train, feature_validate, label_validate, feature_test, label_test = data_handler.get_split_data(**TRAINER_CONFIG)
>> print(feature_train, label_train)
MACD
instrument datetime
SH600000 2010-01-04 -0.008625
2010-01-05 -0.007234
2010-01-06 -0.007693
2010-01-07 -0.009633
2010-01-08 -0.009891
... ...
SZ300251 2014-12-25 0.043072
2014-12-26 0.041345
2014-12-29 0.042733
2014-12-30 0.042066
2014-12-31 0.036299
[322025 rows x 1 columns]
LABEL
instrument datetime
SH600000 2010-01-04 4.260015
2010-01-05 4.292182
2010-01-06 4.207747
2010-01-07 4.113258
2010-01-08 4.159496
... ...
SZ300251 2014-12-25 4.343212
2014-12-26 4.470587
2014-12-29 4.762474
2014-12-30 4.369748
2014-12-31 4.182222
[322025 rows x 1 columns]
>> data_loader = QlibDataLoader(config=data_loader_config)
>> df = data_loader.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31')
>> print(df)
feature label
MACD LABEL
datetime instrument
2010-01-04 SH600000 -0.011547 -0.019672
SH600004 0.002745 -0.014721
SH600006 0.010133 0.002911
SH600008 -0.001113 0.009818
SH600009 0.025878 -0.017758
... ... ...
2017-12-29 SZ300124 0.007306 -0.005074
SZ300136 -0.013492 0.056352
SZ300144 -0.000966 0.011853
SZ300251 0.004383 0.021739
SZ300315 -0.030557 0.012455
Reference
===========
To learn more about ``Data Handler``, please refer to `Data Handler <../component/data.html>`_
To learn more about ``Data Loader``, please refer to `Data Loader <../component/data.html#data-loader>`_
To learn more about ``Data API``, please refer to `Data API <../component/data.html>`_

View File

@@ -126,17 +126,17 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
The arguments of `--include_fields` should correspond with the column names of CSV files. The columns names of dataset provided by ``Qlib`` should include open, close, high, low, volume and factor at least.
- `open`
The opening price
The adjusted opening price
- `close`
The closing price
The adjusted closing price
- `high`
The highest price
The adjusted highest price
- `low`
The lowest price
The adjusted lowest price
- `volume`
The trading volume
The adjusted trading volume
- `factor`
The Restoration factor
The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_
In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.
@@ -195,6 +195,7 @@ Feature
- `ExpressionOps`
`ExpressionOps` will use operator for feature construction.
To know more about ``Operator``, please refer to `Operator API <../reference/api.html#module-qlib.data.ops>`_.
Also, ``Qlib`` supports users to define their own custom ``Operator``, an example has been given in ``tests/test_register_ops.py``.
To know more about ``Feature``, please refer to `Feature API <../reference/api.html#module-qlib.data.base>`_.
@@ -295,6 +296,7 @@ The ``Processor`` module in ``Qlib`` is designed to be learnable and it is respo
- ``RobustZScoreNorm``: `processor` that applies robust z-score normalization.
- ``CSZScoreNorm``: `processor` that applies cross sectional z-score normalization.
- ``CSRankNorm``: `processor` that applies cross sectional rank normalization.
- ``CSZFillna``: `processor` that fills N/A values in a cross sectional way by the mean of the column.
Users can also create their own `processor` by inheriting the base class of ``Processor``. Please refer to the implementation of all the processors for more information (`Processor Link <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`_).

View File

@@ -34,8 +34,9 @@ Here is a general view of the structure of the system:
- Recorder 2
- ...
- ...
This experiment management system defines a set of interface and provided a concrete implementation based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_).
This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_).
If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here <https://www.mlflow.org/docs/latest/cli.html#mlflow-ui>`_.
Qlib Recorder
===================
@@ -91,7 +92,7 @@ Record Template
The ``RecordTemp`` class is a class that enables generate experiment results such as IC and backtest in a certain format. We have provided three different `Record Template` class:
- ``SignalRecord``: This class generates the `preidction` results of the model.
- ``SignalRecord``: This class generates the `prediction` results of the model.
- ``SigAnaRecord``: This class generates the `IC`, `ICIR`, `Rank IC` and `Rank ICIR` of the model.
- ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.

View File

@@ -103,6 +103,12 @@ After saving the config into `configuration.yaml`, users could start the workflo
qrun configuration.yaml
If users want to use ``qrun`` under debug mode, please use the following command:
.. code-block:: bash
python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
.. note::
`qrun` will be placed in your $PATH directory when installing ``Qlib``.

View File

@@ -226,3 +226,8 @@ epub_exclude_files = ["search.html"]
autodoc_member_order = "bysource"
autodoc_default_flags = ["members"]
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"special-members": "__init__",
}

View File

@@ -1,4 +1,5 @@
Cython
cmake
numpy
scipy
scikit-learn
scikit-learn

View File

@@ -63,6 +63,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
If Qlib fails to connect redis via `redis_host` and `redis_port`, cache mechanism will not be used! Please refer to `Cache <../component/data.html#cache>`_ for details.
- `exp_manager`
Type: dict, optional parameter, the setting of `experiment manager` to be used in qlib. Users can specify an experiment manager class, as well as the tracking URI for all the experiments. However, please be aware that we only support input of a dictionary in the following style for `exp_manager`. For more information about `exp_manager`, users can refer to `Recorder: Experiment Management <../component/recorder.html>`_.
.. code-block:: Python
# For example, if you want to set your tracking_uri to a <specific folder>, you can initialize qlib below

View File

@@ -1,6 +1,6 @@
# Requirements
Here is the minimal hardware requirements to run the example.
Here is the minimal hardware requirements to run the `workflow_by_code` example.
- Memory: 16G
- Free Disk: 5G

View File

@@ -64,7 +64,6 @@ task:
loss: mse
n_jobs: 20
GPU: 0
rnn_type: GRU
dataset:
class: TSDatasetH
module_path: qlib.data.dataset

View File

@@ -64,7 +64,6 @@ task:
loss: mse
n_jobs: 20
GPU: 0
rnn_type: GRU
dataset:
class: TSDatasetH
module_path: qlib.data.dataset

View File

@@ -1,32 +1,35 @@
# Benchmarks Performance
Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 10 runs.
Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs.
The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
## Alpha360 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|---|---|---|---|---|---|---|---|---|
| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0655±0.00 | -0.6985±0.00| -0.2961±0.00 |
| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 |
| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 |
| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 |
| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 |
| MLP | Alpha360 | 0.0253±0.01 | 0.1954±0.05| 0.0329±0.00 | 0.2687±0.04 | 0.0161±0.01 | 0.1989±0.19| -0.1275±0.03 |
| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0503±0.01 | 0.3946±0.06| 0.0588±0.00 | 0.4737±0.05 | 0.0799±0.02 | 1.0940±0.26| -0.0810±0.03 |
| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0466±0.01 | 0.3644±0.06| 0.0555±0.00 | 0.4451±0.04 | 0.0783±0.05 | 1.0539±0.65| -0.0844±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0472±0.00 | 0.3558±0.04| 0.0577±0.00 | 0.4522±0.04 | 0.0522±0.02 | 0.7090±0.32| -0.1059±0.03 |
| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0480±0.00 | 0.3555±0.02| 0.0598±0.00 | 0.4616±0.01 | 0.0857±0.03 | 1.1317±0.42| -0.0917±0.01 |
| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 |
| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 |
| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
## Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|---|---|---|---|---|---|---|---|---|
| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 |
| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1567±0.00| -0.0787±0.00 |
| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 |
| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
| MLP | Alpha158 | 0.0363±0.00 | 0.2770±0.02| 0.0421±0.00 | 0.3167±0.01 | 0.0856±0.01 | 1.0397±0.12| -0.1134±0.01 |
| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0344±0.00 | 0.2071±0.02| 0.0103±0.00 | 0.0632±0.01 | 0.0638±0.00 | 0.5845±0.08| -0.1754±0.02 |
| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0302±0.00 | 0.2353±0.03| 0.0411±0.00 | 0.3309±0.03 | 0.0302±0.02 | 0.4353±0.28| -0.1140±0.02 |
| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0359±0.01 | 0.2774±0.06| 0.0448±0.01 | 0.3597±0.05 | 0.0402±0.03 | 0.5743±0.41| -0.1152±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0329±0.01 | 0.2465±0.07| 0.0450±0.01 | 0.3485±0.06 | 0.0288±0.04 | 0.4163±0.50| -0.1269±0.04 |
| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2526±0.01| 0.0454±0.00 | 0.3531±0.01 | 0.0561±0.01 | 0.7992±0.19| -0.0751±0.02 |
| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
| TabNet with pretrain (Sercan O. Arikm et al) | Alpha158 | 0.0344±0.00|0.205±0.11|0.0398±0.00 |0.3479±0.01|0.0827±0.02|1.1141±0.32 |-0.0925±0.02 |
| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
- The selected 20 features are based on the feature importance of a lightgbm-based model.

View File

@@ -25,7 +25,7 @@ import os
import data_formatters.qlib_Alpha158
class ExperimentConfig(object):
class ExperimentConfig:
"""Defines experiment configs and paths to outputs.
Attributes:

View File

@@ -320,7 +320,7 @@ class InterpretableMultiHeadAttention:
return outputs, attn
class TFTDataCache(object):
class TFTDataCache:
"""Caches data for the TFT."""
_data_cache = {}
@@ -348,7 +348,7 @@ class TFTDataCache(object):
# TFT model definitions.
class TemporalFusionTransformer(object):
class TemporalFusionTransformer:
"""Defines Temporal Fusion Transformer.
Attributes:
@@ -972,7 +972,7 @@ class TemporalFusionTransformer(object):
valid_quantiles = self.quantiles
output_size = self.output_size
class QuantileLossCalculator(object):
class QuantileLossCalculator:
"""Computes the combined quantile loss for prespecified quantiles.
Attributes:

Binary file not shown.

View File

@@ -0,0 +1,4 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,74 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TabnetModel
module_path: qlib.contrib.model.pytorch_tabnet
kwargs:
pretrain: True
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
pretrain: [2008-01-01, 2014-12-31]
pretrain_validation: [2015-01-01, 2020-08-01]
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,28 @@
# High-Frequency Dataset
This dataset is an example for RL high frequency trading.
## Get High-Frequency Data
Get high-frequency data by running the following command:
```bash
python workflow.py get_data
```
## Dump & Reload & Reinitialize the Dataset
The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format.
### About Reinitialization
After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc., and generate new data according to the states.
The example is given in `workflow.py`, users can run the code as follows.
### Run the Code
Run the example by running the following command:
```bash
python workflow.py dump_and_load_dataset
```

View File

@@ -0,0 +1,174 @@
from qlib.data.dataset.handler import DataHandler, DataHandlerLP
from qlib.data.dataset.processor import Processor
from qlib.utils import get_cls_kwargs
from qlib.log import TimeInspector
class HighFreqHandler(DataHandlerLP):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
infer_processors=[],
learn_processors=[],
fit_start_time=None,
fit_end_time=None,
drop_raw=True,
):
def check_transform_proc(proc_l):
new_l = []
for p in proc_l:
p["kwargs"].update(
{
"fit_start_time": fit_start_time,
"fit_end_time": fit_end_time,
}
)
new_l.append(p)
return new_l
infer_processors = check_transform_proc(infer_processors)
learn_processors = check_transform_proc(learn_processors)
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
infer_processors=infer_processors,
learn_processors=learn_processors,
drop_raw=drop_raw,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_paused = "Select(Or(IsNull($paused), Eq($paused, 0.0)), {0})"
template_fillnan = "BFillNan(FFillNan({0}))"
# Because there is no vwap field in the yahoo data, a method similar to Simpson integration is used to approximate vwap
simpson_vwap = "($open + 2*$high + 2*$low + $close)/6"
def get_normalized_price_feature(price_field, shift=0):
"""Get normalized price feature ops"""
if shift == 0:
template_norm = "Cut({0}/Ref(DayLast({1}), 240), 240, None)"
else:
template_norm = "Cut(Ref({0}, " + str(shift) + ")/Ref(DayLast({1}), 240), 240, None)"
feature_ops = template_norm.format(
template_if.format(
template_fillnan.format(template_paused.format("$close")),
template_paused.format(price_field),
),
template_fillnan.format(template_paused.format("$close")),
)
return feature_ops
fields += [get_normalized_price_feature("$open", 0)]
fields += [get_normalized_price_feature("$high", 0)]
fields += [get_normalized_price_feature("$low", 0)]
fields += [get_normalized_price_feature("$close", 0)]
fields += [get_normalized_price_feature(simpson_vwap, 0)]
names += ["$open", "$high", "$low", "$close", "$vwap"]
fields += [get_normalized_price_feature("$open", 240)]
fields += [get_normalized_price_feature("$high", 240)]
fields += [get_normalized_price_feature("$low", 240)]
fields += [get_normalized_price_feature("$close", 240)]
fields += [get_normalized_price_feature(simpson_vwap, 240)]
names += ["$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1"]
fields += [
"Cut({0}/Ref(DayLast(Mean({0}, 7200)), 240), 240, None)".format(
"If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format(
template_paused.format("$volume"),
template_paused.format(simpson_vwap),
template_paused.format("$low"),
template_paused.format("$high"),
)
)
]
names += ["$volume"]
fields += [
"Cut(Ref({0}, 240)/Ref(DayLast(Mean({0}, 7200)), 240), 240, None)".format(
"If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format(
template_paused.format("$volume"),
template_paused.format(simpson_vwap),
template_paused.format("$low"),
template_paused.format("$high"),
)
)
]
names += ["$volume_1"]
fields += ["Cut({0}, 240, None)".format(template_paused.format("Date($close)"))]
names += ["date"]
return fields, names
class HighFreqBacktestHandler(DataHandler):
def __init__(
self,
instruments="csi300",
start_time=None,
end_time=None,
):
data_loader = {
"class": "QlibDataLoader",
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
)
def get_feature_config(self):
fields = []
names = []
template_if = "If(IsNull({1}), {0}, {1})"
template_paused = "Select(Or(IsNull($paused), Eq($paused, 0.0)), {0})"
template_fillnan = "BFillNan(FFillNan({0}))"
# Because there is no vwap field in the yahoo data, a method similar to Simpson integration is used to approximate vwap
simpson_vwap = "($open + 2*$high + 2*$low + $close)/6"
fields += [
"Cut({0}, 240, None)".format(template_fillnan.format(template_paused.format("$close"))),
]
names += ["$close0"]
fields += [
"Cut({0}, 240, None)".format(
template_if.format(
template_fillnan.format(template_paused.format("$close")),
template_paused.format(simpson_vwap),
)
)
]
names += ["$vwap0"]
fields += [
"Cut(If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0})), 240, None)".format(
template_paused.format("$volume"),
template_paused.format(simpson_vwap),
template_paused.format("$low"),
template_paused.format("$high"),
)
]
names += ["$volume0"]
return fields, names

View File

@@ -0,0 +1,190 @@
import numpy as np
import pandas as pd
import importlib
from qlib.data.ops import ElemOperator, PairOperator
from qlib.config import C
from qlib.data.cache import H
from qlib.data.data import Cal
def get_calendar_day(freq="day", future=False):
"""Load High-Freq Calendar Date Using Memcache.
Parameters
----------
freq : str
frequency of read calendar file.
future : bool
whether including future trading day.
Returns
-------
_calendar:
array of date.
"""
flag = f"{freq}_future_{future}_day"
if flag in H["c"]:
_calendar = H["c"][flag]
else:
_calendar = np.array(list(map(lambda x: x.date(), Cal.load_calendar(freq, future))))
H["c"][flag] = _calendar
return _calendar
class DayLast(ElemOperator):
"""DayLast Operator
Parameters
----------
feature : Expression
feature instance
Returns
----------
feature:
a series of that each value equals the last value of its day
"""
def _load_internal(self, instrument, start_index, end_index, freq):
_calendar = get_calendar_day(freq=freq)
series = self.feature.load(instrument, start_index, end_index, freq)
return series.groupby(_calendar[series.index]).transform("last")
class FFillNan(ElemOperator):
"""FFillNan Operator
Parameters
----------
feature : Expression
feature instance
Returns
----------
feature:
a forward fill nan feature
"""
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="ffill")
class BFillNan(ElemOperator):
"""BFillNan Operator
Parameters
----------
feature : Expression
feature instance
Returns
----------
feature:
a backfoward fill nan feature
"""
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="bfill")
class Date(ElemOperator):
"""Date Operator
Parameters
----------
feature : Expression
feature instance
Returns
----------
feature:
a series of that each value is the date corresponding to feature.index
"""
def _load_internal(self, instrument, start_index, end_index, freq):
_calendar = get_calendar_day(freq=freq)
series = self.feature.load(instrument, start_index, end_index, freq)
return pd.Series(_calendar[series.index], index=series.index)
class Select(PairOperator):
"""Select Operator
Parameters
----------
feature_left : Expression
feature instance, select condition
feature_right : Expression
feature instance, select value
Returns
----------
feature:
value(feature_right) that meets the condition(feature_left)
"""
def _load_internal(self, instrument, start_index, end_index, freq):
series_condition = self.feature_left.load(instrument, start_index, end_index, freq)
series_feature = self.feature_right.load(instrument, start_index, end_index, freq)
return series_feature.loc[series_condition]
class IsNull(ElemOperator):
"""IsNull Operator
Parameters
----------
feature : Expression
feature instance
Returns
----------
feature:
A series indicating whether the feature is nan
"""
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.isnull()
class Cut(ElemOperator):
"""Cut Operator
Parameters
----------
feature : Expression
feature instance
l : int
l > 0, delete the first l elements of feature (default is None, which means 0)
r : int
r < 0, delete the last -r elements of feature (default is None, which means 0)
Returns
----------
feature:
A series with the first l and last -r elements deleted from the feature.
Note: It is deleted from the raw data, not the sliced data
"""
def __init__(self, feature, l=None, r=None):
self.l = l
self.r = r
if (self.l is not None and self.l <= 0) or (self.r is not None and self.r >= 0):
raise ValueError("Cut operator l shoud > 0 and r should < 0")
super(Cut, self).__init__(feature)
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.iloc[self.l : self.r]
def get_extended_window_size(self):
ll = 0 if self.l is None else self.l
rr = 0 if self.r is None else abs(self.r)
lft_etd, rght_etd = self.feature.get_extended_window_size()
lft_etd = lft_etd + ll
rght_etd = rght_etd + rr
return lft_etd, rght_etd

View File

@@ -0,0 +1,72 @@
import numpy as np
import pandas as pd
from qlib.data.dataset.processor import Processor
from qlib.data.dataset.utils import fetch_df_by_index
class HighFreqNorm(Processor):
def __init__(self, fit_start_time, fit_end_time):
self.fit_start_time = fit_start_time
self.fit_end_time = fit_end_time
def fit(self, df_features):
fetch_df = fetch_df_by_index(df_features, slice(self.fit_start_time, self.fit_end_time), level="datetime")
del df_features
df_values = fetch_df.values
names = {
"price": slice(0, 10),
"volume": slice(10, 12),
}
self.feature_med = {}
self.feature_std = {}
self.feature_vmax = {}
self.feature_vmin = {}
for name, name_val in names.items():
part_values = df_values[:, name_val].astype(np.float32)
if name == "volume":
part_values = np.log1p(part_values)
self.feature_med[name] = np.nanmedian(part_values)
part_values = part_values - self.feature_med[name]
self.feature_std[name] = np.nanmedian(np.absolute(part_values)) * 1.4826 + 1e-12
part_values = part_values / self.feature_std[name]
self.feature_vmax[name] = np.nanmax(part_values)
self.feature_vmin[name] = np.nanmin(part_values)
def __call__(self, df_features):
df_features.set_index("date", append=True, drop=True, inplace=True)
df_values = df_features.values
names = {
"price": slice(0, 10),
"volume": slice(10, 12),
}
for name, name_val in names.items():
if name == "volume":
df_values[:, name_val] = np.log1p(df_values[:, name_val])
df_values[:, name_val] -= self.feature_med[name]
df_values[:, name_val] /= self.feature_std[name]
slice0 = df_values[:, name_val] > 3.0
slice1 = df_values[:, name_val] > 3.5
slice2 = df_values[:, name_val] < -3.0
slice3 = df_values[:, name_val] < -3.5
df_values[:, name_val][slice0] = (
3.0 + (df_values[:, name_val][slice0] - 3.0) / (self.feature_vmax[name] - 3) * 0.5
)
df_values[:, name_val][slice1] = 3.5
df_values[:, name_val][slice2] = (
-3.0 - (df_values[:, name_val][slice2] + 3.0) / (self.feature_vmin[name] + 3) * 0.5
)
df_values[:, name_val][slice3] = -3.5
idx = df_features.index.droplevel("datetime").drop_duplicates()
idx.set_names(["instrument", "datetime"], inplace=True)
# Reshape is specifically for adapting to RL high-freq executor
feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240)
feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
df_new_features = pd.DataFrame(
data=np.concatenate((feat, feat_1), axis=1),
index=idx,
columns=["FEATURE_%d" % i for i in range(12 * 240)],
).sort_index()
return df_new_features

View File

@@ -0,0 +1,217 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
import fire
from pathlib import Path
import qlib
import pickle
import numpy as np
import pandas as pd
from qlib.config import REG_CN, HIGH_FREQ_CONFIG
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import init_instance_by_config, exists_qlib_data
from qlib.data.dataset.handler import DataHandlerLP
from qlib.data.ops import Operators
from qlib.data.data import Cal
from qlib.tests.data import GetData
from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut
class HighfreqWorkflow(object):
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
MARKET = "all"
BENCHMARK = "SH000300"
start_time = pd.Timestamp("2020-09-15 00:00:00")
end_time = pd.Timestamp("2021-01-18 16:00:00")
train_end_time = pd.Timestamp("2020-11-30 16:00:00")
test_start_time = pd.Timestamp("2020-12-01 00:00:00")
DATA_HANDLER_CONFIG0 = {
"start_time": start_time,
"end_time": end_time,
"fit_start_time": start_time,
"fit_end_time": train_end_time,
"instruments": MARKET,
"infer_processors": [{"class": "HighFreqNorm", "module_path": "highfreq_processor", "kwargs": {}}],
}
DATA_HANDLER_CONFIG1 = {
"start_time": start_time,
"end_time": end_time,
"instruments": MARKET,
}
task = {
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "HighFreqHandler",
"module_path": "highfreq_handler",
"kwargs": DATA_HANDLER_CONFIG0,
},
"segments": {
"train": (start_time, train_end_time),
"test": (
test_start_time,
end_time,
),
},
},
},
"dataset_backtest": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "HighFreqBacktestHandler",
"module_path": "highfreq_handler",
"kwargs": DATA_HANDLER_CONFIG1,
},
"segments": {
"train": (start_time, train_end_time),
"test": (
test_start_time,
end_time,
),
},
},
},
}
def _init_qlib(self):
"""initialize qlib"""
# use yahoo_cn_1min data
QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF}
provider_uri = QLIB_INIT_CONFIG.get("provider_uri")
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN)
qlib.init(**QLIB_INIT_CONFIG)
def _prepare_calender_cache(self):
"""preload the calendar for cache"""
# This code used the copy-on-write feature of Linux to avoid calculating the calendar multiple times in the subprocess
# This code may accelerate, but may be not useful on Windows and Mac Os
Cal.calendar(freq="1min")
get_calendar_day(freq="1min")
def get_data(self):
"""use dataset to get highreq data"""
self._init_qlib()
self._prepare_calender_cache()
dataset = init_instance_by_config(self.task["dataset"])
xtrain, xtest = dataset.prepare(["train", "test"])
print(xtrain, xtest)
dataset_backtest = init_instance_by_config(self.task["dataset_backtest"])
backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"])
print(backtest_train, backtest_test)
return
def dump_and_load_dataset(self):
"""dump and load dataset state on disk"""
self._init_qlib()
self._prepare_calender_cache()
dataset = init_instance_by_config(self.task["dataset"])
dataset_backtest = init_instance_by_config(self.task["dataset_backtest"])
##=============dump dataset=============
dataset.to_pickle(path="dataset.pkl")
dataset_backtest.to_pickle(path="dataset_backtest.pkl")
del dataset, dataset_backtest
##=============reload dataset=============
with open("dataset.pkl", "rb") as file_dataset:
dataset = pickle.load(file_dataset)
with open("dataset_backtest.pkl", "rb") as file_dataset_backtest:
dataset_backtest = pickle.load(file_dataset_backtest)
self._prepare_calender_cache()
##=============reinit dataset=============
dataset.init(
handler_kwargs={
"init_type": DataHandlerLP.IT_LS,
"start_time": "2021-01-19 00:00:00",
"end_time": "2021-01-25 16:00:00",
},
segment_kwargs={
"test": (
"2021-01-19 00:00:00",
"2021-01-25 16:00:00",
),
},
)
dataset_backtest.init(
handler_kwargs={
"start_time": "2021-01-19 00:00:00",
"end_time": "2021-01-25 16:00:00",
},
segment_kwargs={
"test": (
"2021-01-19 00:00:00",
"2021-01-25 16:00:00",
),
},
)
##=============get data=============
xtest = dataset.prepare(["test"])
backtest_test = dataset_backtest.prepare(["test"])
print(xtest, backtest_test)
return
def get_high_freq_data(self, data_path):
self._init_qlib()
self._prepare_calender_cache()
import os
dataset = init_instance_by_config(self.task["dataset"])
xtrain, xtest = dataset.prepare(["train", "test"])
normed_feature = pd.concat([xtrain, xtest]).sort_index()
dic = dict(tuple(normed_feature.groupby("instrument")))
feature_path = os.path.join(data_path, "normed_feature/")
if not os.path.exists(feature_path):
os.makedirs(feature_path)
for k, v in dic.items():
v.to_pickle(feature_path + f"{k}.pkl")
dataset_backtest = init_instance_by_config(self.task["dataset_backtest"])
backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"])
backtest = pd.concat([backtest_train, backtest_test]).sort_index()
backtest['date'] = backtest.index.map(lambda x: x[1].date())
backtest.set_index('date', append=True, drop=True, inplace=True)
dic = dict(tuple(backtest.groupby("instrument")))
backtest_path = os.path.join(data_path, "backtest/")
if not os.path.exists(backtest_path):
os.makedirs(backtest_path)
for k, v in dic.items():
v.to_pickle(backtest_path + f"{k}.pkl.backtest")
if __name__ == "__main__":
#fire.Fire(HighfreqWorkflow)
data_path = '../data/'
workflow = HighfreqWorkflow()
workflow.get_high_freq_data(data_path)

View File

@@ -69,9 +69,9 @@ def handler(signum, frame):
os.system("kill -9 %d" % os.getpid())
signal.signal(signal.SIGTSTP, handler)
signal.signal(signal.SIGINT, handler)
# function to calculate the mean and std of a list in the results dictionary
def cal_mean_std(results) -> dict:
mean_std = dict()

104
examples/trade/README.md Normal file
View File

@@ -0,0 +1,104 @@
# Universal Trading for Order Execution with Oracle Policy Distillation
This is the experiment code for our AAAI 2021 paper "[Universal Trading for Order Execution with Oracle Policy Distillation](https://arxiv.org/abs/2103.10860)", including the implementations of all the compared methods in the paper and a general reinforcement learning framework for order execution in quantitative finance.
## Abstract
As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Towards effective execution strategy, recent years have witnessed the shift from the analytical view with model-based market assumptions to model-free perspective, i.e., reinforcement learning, due to its nature of sequential decision optimization. However, the noisy and yet imperfect market information that can be leveraged by the policy has made it quite challenging to build up sample efficient reinforcement learning methods to achieve effective order execution. In this paper, we propose a novel universal trading policy optimization framework to bridge the gap between the noisy yet imperfect market states and the optimal action sequences for order execution. Particularly, this framework leverages a policy distillation method that can better guide the learning of the common policy towards practically optimal execution by an oracle teacher with perfect information to approximate the optimal trading strategy. The extensive experiments have shown significant improvements of our method over various strong baselines, with reasonable trading actions.
## Environment Dependencies
### Dependencies
```
gym==0.17.3
torch==1.6.0
numba==0.51.2
numpy==1.19.1
pandas==1.1.3
tqdm==4.50.2
tianshou==0.3.0.post1
env==0.1.0
PyYAML==5.4.1
redis==3.5.3
```
### Environment Variable
`EXP_PATH` Absolute path to your config folder, we give folder `exp` as an example.
`OUTPUT_DIR` Absolute path to your log folder.
## Data Processing
For Feature processing, we take Yahoo dataset as an example, which can be precessed in `qlib/examples/highfreq/workflow.py` file. If you have a need to change your data storage path, you can change the `data_path` in `workflow.py`, and then do the following.
```
python workflow.py
```
For order generation, if you have changed change the the `data_path` in `workflow.py`, change `data_path` in `order_gen.py` again, then do the following.
```
python order_gen.py
```
## Training and backtest
### Config file
Config file is need to start our project, we take `PPO`, `OPDS` and `OPD` as an example in folder `exp/example`. If you want to use our given config, make sure the `data_path` you set before matches the config file.
### Baseline method
To run a method, you can do the following.
```
python main.py --config={config_path}
```
Where `{config_path}` means the relative path from your config.yml to `EXP_PATH`.
If you need to run our given method such as PPO method, you can do the following.
```
python main.py --config=example/PPO/config.yml
```
### OPD method
OPD method is a multi step method, at first you should run OPDT as the teacher in OPD method.
```
python main.py --config=example/OPDT/config.yml
```
After training, find the `policy_best` file in your OPDT log file and copy it to `trade` file for backtest. Also you can change `policy_path` in the `example/OPDT_b/config.yml` to your `policy_best` file. Then run the backtest method.
```
python main.py --config=example/OPDT_b/config.yml
```
then processed feature from teacher. Remember to change `log_path` if you have changed `log_dir` in `OPDT_b/config.yml`.
```
python teacher_feature.py
```
and finally start our OPD method.
```
python main.py --config=example/OPD/config.yml
```
## Citation
You are more than welcome to citetmu our paper:
```
@inproceedings{fang2021universal,
title={Universal Trading for Order Execution with Oracle Policy Distillation},
author={Fang, Yuchen and Ren, Kan and Liu, Weiqing and Zhou, Dong and Zhang, Weinan and Bian, Jiang and Yu, Yong and Liu, Tie-Yan},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={35},
number={1},
pages={107--115},
year={2021}
}
```

View File

@@ -0,0 +1,10 @@
# from rl4execution import env, trainer, exploration
# __all__ = [
# "env",
# "data",
# "utils",
# "policy",
# "trainer",
# "exploration",
# ]

View File

@@ -0,0 +1,4 @@
from .base import *
from .action_rl import *
from .action_rule import *
from .action_rl import *

View File

@@ -0,0 +1,27 @@
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
from .base import Base_Action
class Static_Action(Base_Action):
""" """
def __init__(self, config):
self.action_num = config["action_num"]
self.action_map = config["action_map"]
def get_space(self):
""" """
return Discrete(self.action_num)
def get_action(self, action, target, position, **kargs):
"""
:param action:
:param position:
:param target:
:param **kargs:
"""
return min(target * self.action_map[action], position)

View File

@@ -0,0 +1,46 @@
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
from .base import Base_Action
class Rule_Dynamic(Base_Action):
""" """
def get_space(self):
""" """
return Box(0, np.inf, shape=(), dtype=np.float32)
def get_action(self, action, target, position, max_step_num, t, **kargs):
"""
:param action: param target:
:param position: param max_step_num:
:param t: param **kargs:
:param target:
:param max_step_num:
:param **kargs:
"""
return position / (max_step_num - (t + 1)) * action
class Rule_Static(Base_Action):
""" """
def get_space(self):
""" """
return Box(0, np.inf, shape=(), dtype=np.float32)
def get_action(self, action, target, position, max_step_num, t, **kargs):
"""
:param action: param target:
:param position: param max_step_num:
:param t: param **kargs:
:param target:
:param max_step_num:
:param **kargs:
"""
return target / max_step_num * action

View File

@@ -0,0 +1,20 @@
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
class Base_Action(object):
""" """
def __init__(self, config):
return
def __call__(self, *args, **kargs):
return self.get_action(*args, **kargs)
def get_action(self, action):
"""
:param action:
"""
return action

View File

@@ -0,0 +1,46 @@
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
from .base import Base_Action
class Rule_Static_Interval(Base_Action):
""" """
def get_space(self):
""" """
return Box(0, np.inf, shape=(), dtype=np.float32)
def get_action(self, action, target, position, interval_num, interval, **kargs):
"""
:param action: param target:
:param position: param interval_num:
:param interval: param **kargs:
:param target:
:param interval_num:
:param **kargs:
"""
return target / (interval_num) * action
class Rule_Dynamic_Interval(Base_Action):
""" """
def get_space(self):
""" """
return Box(0, np.inf, shape=(), dtype=np.float32)
def get_action(self, action, target, position, interval_num, interval, **kargs):
"""
:param action: param target:
:param position: param interval_num:
:param interval: param **kargs:
:param target:
:param interval_num:
:param **kargs:
"""
return position / (interval_num - interval) * action

View File

@@ -0,0 +1 @@
from .basic import *

View File

@@ -0,0 +1,69 @@
from tianshou.policy import BasePolicy
from tianshou.data import Batch
import numpy as np
import torch
from env import nan_weighted_avg
class TWAP(BasePolicy):
""" The TWAP strategy. """
def __init__(self, config):
super().__init__()
self.max_step_num = config["max_step_num"]
self.num_cpus = config["num_cpus"]
# @njit(parallel=True)
def forward(self, batch: Batch, state=None, **kwargs) -> Batch:
act = [1] * len(batch.obs.private)
return Batch(act=act, state=state)
def learn(self, batch, batch_size, repeat):
pass
def process_fn(self, batch, buffer, indice):
pass
class VWAP(BasePolicy):
""" The VWAP strategy."""
def __init__(self, config):
super().__init__()
def forward(self, batch, state, **kwargs):
obs = batch.obs
r = np.stack(obs.prediction).reshape(-1)
return Batch(act=r, state=state)
def learn(self, batch, batch_size, repeat):
pass
def process_fn(self, batch, buffer, indice):
pass
class AC(VWAP):
"""Almgren-Chriss strategy."""
def __init__(self, config):
super().__init__(config)
self.T = config["max_step_num"]
self.gamma = 0
self.tau = 1
self.lamb = config["lambda"]
self.eps = 0.0625
self.alpha = 0.02
self.eta = 2.5e-6
def forward(self, batch, state, **kwargs):
obs = batch.obs
sig = np.stack(obs.prediction).reshape(-1)
sell = ~np.stack(obs.is_buy).astype(np.bool)
data = np.stack(obs.private)
t = data[:, 2]
t = t + 1
k_tild = self.lamb / self.eta * sig * sig
k = np.arccosh(k_tild / 2 + 1)
act = (np.sinh(k * (self.T - t)) - np.sinh(k * (self.T - t - 1))) / np.sinh(k * self.T)
return Batch(act=act, state=state)

342
examples/trade/collector.py Normal file
View File

@@ -0,0 +1,342 @@
import gym
import time
import torch
import warnings
import numpy as np
from copy import deepcopy
from numbers import Number
from typing import Any, Dict, List, Union, Optional, Callable
from vecenv import BaseVectorEnv
from tianshou.policy import BasePolicy
from tianshou.data import Batch, ReplayBuffer, ListReplayBuffer, to_numpy
from tianshou.exploration import BaseNoise
from tianshou.env import DummyVectorEnv
from tianshou.data.collector import _batch_set_item
class Collector(object):
def __init__(
self,
policy: BasePolicy,
env: Union[gym.Env, BaseVectorEnv],
testing=False,
buffer: Optional[ReplayBuffer] = None,
preprocess_fn: Optional[Callable[..., Batch]] = None,
action_noise: Optional[BaseNoise] = None,
reward_metric: Optional[Callable[[np.ndarray], float]] = np.sum,
) -> None:
super().__init__()
if not isinstance(env, BaseVectorEnv):
env = DummyVectorEnv([lambda: env])
self.env = env
self.env_num = len(env)
# environments that are available in step()
# this means all environments in synchronous simulation
# but only a subset of environments in asynchronous simulation
self._ready_env_ids = np.arange(self.env_num)
# self.async is a flag to indicate whether this collector works
# with asynchronous simulation
self.is_async = env.is_async
self.testing = testing
# need cache buffers before storing in the main buffer
self._cached_buf = [ListReplayBuffer() for _ in range(self.env_num)]
self.buffer = buffer
self.policy = policy
self.preprocess_fn = preprocess_fn
self.process_fn = policy.process_fn
# self._action_space = env.action_space
self._action_noise = action_noise
self._rew_metric = reward_metric or Collector._default_rew_metric
# avoid creating attribute outside __init__
# self.reset()
@staticmethod
def _default_rew_metric(x: Union[Number, np.number]) -> Union[Number, np.number]:
# this internal function is designed for single-agent RL
# for multi-agent RL, a reward_metric must be provided
assert np.asanyarray(x).size == 1, "Please specify the reward_metric " "since the reward is not a scalar."
return x
def reset(self) -> None:
"""Reset all related variables in the collector."""
# use empty Batch for ``state`` so that ``self.data`` supports slicing
# convert empty Batch to None when passing data to policy
self.data = Batch(state={}, obs={}, act={}, rew={}, done={}, info={}, obs_next={}, policy={})
self.reset_env()
self.reset_buffer()
self.reset_stat()
if self._action_noise is not None:
self._action_noise.reset()
def reset_stat(self) -> None:
"""Reset the statistic variables."""
self.collect_time, self.collect_step, self.collect_episode = 0.0, 0, 0
def reset_buffer(self) -> None:
"""Reset the main data buffer."""
if self.buffer is not None:
self.buffer.reset()
def get_env_num(self) -> int:
""" """
return self.env_num
def reset_env(self) -> None:
"""Reset all of the environment(s)' states and the cache buffers."""
self._ready_env_ids = np.arange(self.env_num)
self.env.reset_sampler()
obs, stop_id = self.env.reset()
if self.preprocess_fn:
obs = self.preprocess_fn(obs=obs).get("obs", obs)
self.data.obs = obs
for b in self._cached_buf:
b.reset()
self._ready_env_ids = np.array([x for x in self._ready_env_ids if x not in stop_id])
def _reset_state(self, id: Union[int, List[int]]) -> None:
"""Reset the hidden state: self.data.state[id]."""
state = self.data.state # it is a reference
if isinstance(state, torch.Tensor):
state[id].zero_()
elif isinstance(state, np.ndarray):
state[id] = None if state.dtype == np.object else 0
elif isinstance(state, Batch):
state.empty_(id)
def collect(
self,
n_step: Optional[int] = None,
n_episode: Optional[Union[int, List[int]]] = None,
random: bool = False,
render: Optional[float] = None,
log_fn=None,
no_grad: bool = True,
) -> Dict[str, float]:
"""Collect a specified number of step or episode.
:param int: n_step: how many steps you want to collect.
:param n_episode: how many episodes you want to collect. If it is an
int, it means to collect at lease ``n_episode`` episodes; if it is
a list, it means to collect exactly ``n_episode[i]`` episodes in
the i-th environment
:param bool: random: whether to use random policy for collecting data,
defaults to False.
:param float: render: the sleep time between rendering consecutive
frames, defaults to None (no rendering).
:param bool: no_grad: whether to retain gradient in policy.forward,
defaults to True (no gradient retaining).
.. note::
One and only one collection number specification is permitted,
either ``n_step`` or ``n_episode``.
:param n_step: Optional[int]: (Default value = None)
:param n_episode: Optional[Union[int:List[int]]]: (Default value = None)
:param random: bool: (Default value = False)
:param render: Optional[float]: (Default value = None)
:param log_fn: Default value = None)
:param no_grad: bool: (Default value = True)
:param n_step: Optional[int]: (Default value = None)
:param n_episode: Optional[Union[int:
:param List[int]]]: (Default value = None)
:param random: bool: (Default value = False)
:param render: Optional[float]: (Default value = None)
:param no_grad: bool: (Default value = True)
:param n_step: Optional[int]: (Default value = None)
:param n_episode: Optional[Union[int:
:param random: bool: (Default value = False)
:param render: Optional[float]: (Default value = None)
:param no_grad: bool: (Default value = True)
:returns: A dict including the following keys
* ``n/ep`` the collected number of episodes.
* ``n/st`` the collected number of steps.
* ``v/st`` the speed of steps per second.
* ``v/ep`` the speed of episode per second.
* ``rew`` the mean reward over collected episodes.
* ``len`` the mean length over collected episodes.
"""
assert (
(n_step is not None and n_episode is None and n_step > 0)
or (n_step is None and n_episode is not None and np.sum(n_episode) > 0)
or self.testing
), "Only one of n_step or n_episode is allowed in Collector.collect, "
f"got n_step = {n_step}, n_episode = {n_episode}."
start_time = time.time()
step_count = 0
step_time = 0.0
reset_time = 0.0
model_time = 0.0
# episode of each environment
episode_count = np.zeros(self.env_num)
# If n_episode is a list, and some envs have collected the required
# number of episodes, these envs will be recorded in this list, and
# they will not be stepped.
finished_env_ids = []
rewards = []
whole_data = Batch()
if isinstance(n_episode, list):
assert len(n_episode) == self.get_env_num()
finished_env_ids = [i for i in self._ready_env_ids if n_episode[i] <= 0]
self._ready_env_ids = np.array([x for x in self._ready_env_ids if x not in finished_env_ids])
while True:
if step_count >= 100000 and episode_count.sum() == 0:
warnings.warn(
"There are already many steps in an episode. "
"You should add a time limitation to your environment!",
Warning,
)
is_async = self.is_async or len(finished_env_ids) > 0
if is_async:
# self.data are the data for all environments in async
# simulation or some envs have finished,
# **only a subset of data are disposed**,
# so we store the whole data in ``whole_data``, let self.data
# to be the data available in ready environments, and finally
# set these back into all the data
whole_data = self.data
self.data = self.data[self._ready_env_ids]
# restore the state and the input data
last_state = self.data.state
if isinstance(last_state, Batch) and last_state.is_empty():
last_state = None
self.data.update(state=Batch(), obs_next=Batch(), policy=Batch())
# calculate the next action
start = time.time()
if random:
spaces = self._action_space
result = Batch(act=[spaces[i].sample() for i in self._ready_env_ids])
else:
if no_grad:
with torch.no_grad(): # faster than retain_grad version
result = self.policy(self.data, last_state)
else:
result = self.policy(self.data, last_state)
model_time += time.time() - start
state = result.get("state", Batch())
# convert None to Batch(), since None is reserved for 0-init
if state is None:
state = Batch()
self.data.update(state=state, policy=result.get("policy", Batch()))
# save hidden state to policy._state, in order to save into buffer
if not (isinstance(state, Batch) and state.is_empty()):
self.data.policy._state = self.data.state
self.data.act = to_numpy(result.act)
if self._action_noise is not None:
assert isinstance(self.data.act, np.ndarray)
self.data.act += self._action_noise(self.data.act.shape)
# step in env
start = time.time()
if not is_async:
obs_next, rew, done, info = self.env.step(self.data.act)
if log_fn:
log_fn(info)
else:
# store computed actions, states, etc
_batch_set_item(whole_data, self._ready_env_ids, self.data, self.env_num)
# fetch finished data
obs_next, rew, done, info = self.env.step(self.data.act, id=self._ready_env_ids)
self._ready_env_ids = np.array([i["env_id"] for i in info])
# get the stepped data
self.data = whole_data[self._ready_env_ids]
if log_fn:
log_fn(info)
step_time += time.time() - start
# move data to self.data
self.data.update(obs_next=obs_next, rew=rew, done=done, info=[{} for i in info])
if render:
self.env.render()
time.sleep(render)
# add data into the buffer
if self.preprocess_fn:
result = self.preprocess_fn(**self.data) # type: ignore
self.data.update(result)
for j, i in enumerate(self._ready_env_ids):
# j is the index in current ready_env_ids
# i is the index in all environments
if self.buffer is None:
# users do not want to store data, so we store
# small fake data here to make the code clean
self._cached_buf[i].add(obs=0, act=0, rew=rew[j], done=0)
else:
self._cached_buf[i].add(**self.data[j])
if done[j]:
if not (isinstance(n_episode, list) and episode_count[i] >= n_episode[i]):
episode_count[i] += 1
rewards.append(self._rew_metric(np.sum(self._cached_buf[i].rew, axis=0)))
step_count += len(self._cached_buf[i])
if self.buffer is not None:
self.buffer.update(self._cached_buf[i])
if isinstance(n_episode, list) and episode_count[i] >= n_episode[i]:
# env i has collected enough data, it has finished
finished_env_ids.append(i)
self._cached_buf[i].reset()
self._reset_state(j)
obs_next = self.data.obs_next
start = time.time()
if sum(done):
env_ind_local = np.where(done)[0].tolist()
env_ind_global = self._ready_env_ids[env_ind_local]
obs_reset, stop_id = self.env.reset(env_ind_global)
_ready_env_ids = self._ready_env_ids.tolist()
for i in stop_id:
finished_env_ids.append(i)
# env_ind_local.remove(_ready_env_ids.index(i))
if len(env_ind_local) > 0:
if self.preprocess_fn:
obs_reset = self.preprocess_fn(obs=obs_reset).get("obs", obs_reset)
obs_next[env_ind_local] = obs_reset
reset_time += time.time() - start
self.data.obs = obs_next
if is_async:
# set data back
whole_data = deepcopy(whole_data) # avoid reference in ListBuf
_batch_set_item(whole_data, self._ready_env_ids, self.data, self.env_num)
# let self.data be the data in all environments again
self.data = whole_data
self._ready_env_ids = np.array([x for x in self._ready_env_ids if x not in finished_env_ids])
if n_step:
if step_count >= n_step:
break
else:
if isinstance(n_episode, int) and episode_count.sum() >= n_episode:
break
if isinstance(n_episode, list) and (episode_count >= n_episode).all():
break
if len(self._ready_env_ids) == 0 and self.testing:
break
# finished envs are ready, and can be used for the next collection
self._ready_env_ids = np.array(self._ready_env_ids.tolist() + finished_env_ids)
# generate the statistics
episode_count = sum(episode_count)
duration = max(time.time() - start_time, 1e-9)
self.collect_step += step_count
self.collect_episode += episode_count
self.collect_time += duration
return {
"n/ep": episode_count,
"n/st": step_count,
"v/st": step_count / duration,
"v/ep": episode_count / duration,
"t/st": step_time / step_count,
"t/re": reset_time / episode_count,
"t/mo": model_time / step_count,
"rew": np.mean(rewards),
"rew_std": np.std(rewards),
"len": step_count / episode_count,
}

1
examples/trade/env/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
from .env_rl import *

481
examples/trade/env/env_rl.py vendored Normal file
View File

@@ -0,0 +1,481 @@
import gym
gym.logger.set_level(40)
import numpy as np
import pandas as pd
import pickle as pkl
import datetime
import random
import os
import json
import time
import tianshou as ts
import copy
from multiprocessing import Process, Pipe, Queue
from typing import List, Tuple, Union, Optional, Callable, Any
from tianshou.env.utils import CloudpickleWrapper
from scipy.stats import pearsonr
from sklearn.metrics import roc_auc_score
import sys
sys.path.append("..")
from util import merge_dicts, nan_weighted_avg, robust_auc
import reward
import observation
import action
ZERO = 1e-7
class StockEnv(gym.Env):
"""Single-assert environment"""
def __init__(self, config):
self.max_step_num = config["max_step_num"]
self.limit = config["limit"]
self.time_interval = config["time_interval"]
self.interval_num = config["interval_num"]
self.offset = config["offset"] if "offset" in config else 0
if "last_reward" in config:
self.last_reward = config["last_reward"]
else:
self.last_reward = None
if "log" in config:
self.log = config["log"]
else:
self.log = True
# loader_conf = config['loader']['config']
obs_conf = config["obs"]["config"]
obs_conf["features"] = config["features"]
obs_conf["time_interval"] = self.time_interval
obs_conf["max_step_num"] = self.max_step_num
self.obs = getattr(observation, config["obs"]["name"])(obs_conf)
self.action_func = getattr(action, config["action"]["name"])(config["action"]["config"])
self.reward_func_list = []
self.reward_log_dict = {}
self.reward_coef = []
for name, conf in config["reward"].items():
self.reward_coef.append(conf.pop("coefficient"))
self.reward_func_list.append(getattr(reward, name)(conf))
self.reward_log_dict[name] = 0.0
self.observation_space = self.obs.get_space()
self.action_space = self.action_func.get_space()
def toggle_log(self, log):
self.log = log
def reset(self, sample):
"""
:param sample:
"""
for key in self.reward_log_dict.keys():
self.reward_log_dict[key] = 0.0
if not sample is None:
(
self.ins,
self.date,
self.raw_df_values,
self.raw_df_columns,
self.raw_df_index,
self.feature_dfs,
self.target,
self.is_buy,
) = sample
self.raw_df = pd.DataFrame(index=self.raw_df_index, data=self.raw_df_values, columns=self.raw_df_columns,)
del self.raw_df_values, self.raw_df_columns, self.raw_df_index
start_time = time.time()
self.load_time = time.time() - start_time
self.day_vwap = nan_weighted_avg(
self.raw_df["$vwap0"].values[self.offset : self.offset + self.max_step_num],
self.raw_df["$volume0"].values[self.offset : self.offset + self.max_step_num],
)
try:
assert not (np.isnan(self.day_vwap) or np.isinf(self.day_vwap))
except:
print(self.raw_df)
print(self.ins)
print(self.day_vwap)
self.raw_df.to_pickle("/nfs_data1/kanren/error_df.pkl")
self.day_twap = np.nanmean(self.raw_df["$vwap0"].values[self.offset : self.offset + self.max_step_num])
self.t = -1 + self.offset
self.interval = 0
self.position = self.target
self.eps_start = time.time()
self.state = self.obs(
self.raw_df,
self.feature_dfs,
self.t,
self.interval,
self.position,
self.target,
self.is_buy,
self.max_step_num,
self.interval_num,
)
if self.log:
index_array = [
np.array([self.ins] * self.max_step_num),
self.raw_df.index.to_numpy()[self.offset : self.offset + self.max_step_num],
np.array([self.date] * self.max_step_num),
]
self.traded_log = pd.DataFrame(
data={
"$v_t": np.nan,
"$max_vol_t": (self.raw_df["$volume0"] * self.limit).values[
self.offset : self.offset + self.max_step_num
],
"$traded_t": np.nan,
"$vwap_t": self.raw_df["$vwap0"].values[self.offset : self.offset + self.max_step_num],
"action": np.nan,
},
index=index_array,
)
# v_t: The amount of shares the agent hope to trade
# max_vol_t: The max amount of shares can be traded
# traded_t: The amount of shares that is acually traded
# action: the action of agent, may have various meanings in different settings.
self.done = False
if self.limit > 1:
self.this_valid = np.inf
else:
self.this_valid = np.nansum(self.raw_df["$volume0"].values) * self.limit
self.this_cash = 0
self.step_time = []
self.action_log = [np.nan] * self.interval_num
self.reset_time = time.time() - start_time
self.real_eps_time = self.reset_time
self.total_reward = 0
self.total_instant_rew = 0
self.last_rew = 0
return self.state
def step(self, action):
"""
:param action:
"""
start_time = time.time()
self.action_log[self.interval] = action
volume_t = self.action_func(
action,
self.target,
self.position,
max_step_num=self.max_step_num,
t=self.t - self.offset,
interval=self.interval,
interval_num=self.interval_num,
)
self.interval += 1
reward = 0.0
time_left = self.max_step_num - self.t - 1 + self.offset
for i in range(self.time_interval):
v_t = volume_t / min(self.time_interval, time_left)
self.t += 1
if self.t == self.max_step_num - 1 + self.offset:
v_t = self.position
if self.log:
log_index = self.t - self.offset
self.traded_log.iat[log_index, 0] = v_t
self.traded_log.iat[log_index, 4] = action
vwap_t, vol_t = self.raw_df.iloc[self.t][["$vwap0", "$volume0"]]
max_vol_t = self.limit * vol_t
if self.limit >= 1:
max_vol_t = np.inf
if v_t > min(self.position, max_vol_t):
if self.position <= max_vol_t:
v_t = self.position
else:
v_t = max_vol_t
self.position -= v_t
self.this_cash += vwap_t * v_t
if self.log:
self.traded_log.iat[log_index, 2] = v_t
if self.is_buy:
performance_raise = (1 - vwap_t / self.day_vwap) * 10000
PA_t = (1 - vwap_t / self.day_twap) * 10000
else:
performance_raise = (vwap_t / self.day_vwap - 1) * 10000
PA_t = (vwap_t / self.day_twap - 1) * 10000
for i, reward_func in enumerate(self.reward_func_list):
if reward_func.isinstant:
tmp_r = reward_func(performance_raise, v_t, self.target, PA_t)
reward += tmp_r * self.reward_coef[i]
self.reward_log_dict[type(reward_func).__name__] += tmp_r
if self.t == self.max_step_num - 1 + self.offset:
break
if self.position < ZERO:
self.done = True
if self.interval == self.interval_num:
self.done = True
self.step_time.append(time.time() - start_time)
self.real_eps_time += time.time() - start_time
if self.done:
this_traded = self.target - self.position
this_vwap = (self.this_cash / this_traded) if this_traded > ZERO else self.day_vwap
valid = min(self.target, self.this_valid)
this_ffr = (this_traded / valid) if valid > ZERO else 1.0
if abs(this_ffr - 1.0) < ZERO:
this_ffr = 1.0
this_ffr *= 100
this_vv_ratio = this_vwap / self.day_vwap
vwap = self.raw_df["$vwap0"].values[self.offset : self.max_step_num + self.offset]
this_tt_ratio = this_vwap / np.nanmean(vwap)
if self.is_buy:
performance_raise = (1 - this_vv_ratio) * 10000
PA = (1 - this_tt_ratio) * 10000
else:
performance_raise = (this_vv_ratio - 1) * 10000
PA = (this_tt_ratio - 1) * 10000
for i, reward_func in enumerate(self.reward_func_list):
if not reward_func.isinstant:
tmp_r = reward_func(performance_raise, this_ffr, this_tt_ratio, self.is_buy)
reward += tmp_r * self.reward_coef[i]
self.reward_log_dict[type(reward_func).__name__] += tmp_r
self.state = self.obs(
self.raw_df,
self.feature_dfs,
self.t,
self.interval,
self.position,
self.target,
self.is_buy,
self.max_step_num,
self.interval_num,
action,
)
if self.log:
res = pd.DataFrame(
{
"target": self.target,
"sell": not self.is_buy,
"vwap": this_vwap,
"this_vv_ratio": this_vv_ratio,
"this_ffr": this_ffr,
},
index=[[self.ins], [self.date]],
)
money = self.target * self.day_vwap
if self.is_buy:
info = {
"money": money,
"money_buy": money,
"action": self.action_log,
"ffr": this_ffr,
"obs0_PR": performance_raise,
"ffr_buy": this_ffr,
"PR_buy": performance_raise,
"PA": PA,
"PA_buy": PA,
"vwap": this_vwap,
}
else:
info = {
"money": money,
"money_sell": money,
"action": self.action_log,
"ffr": this_ffr,
"obs0_PR": performance_raise,
"ffr_sell": this_ffr,
"PR_sell": performance_raise,
"PA": PA,
"PA_sell": PA,
"vwap": this_vwap,
}
info = merge_dicts(info, self.reward_log_dict)
if self.log:
info["df"] = self.traded_log
info["res"] = res
del self.feature_dfs
return self.state, reward, self.done, info
else:
self.state = self.obs(
self.raw_df,
self.feature_dfs,
self.t,
self.interval,
self.position,
self.target,
self.is_buy,
self.max_step_num,
self.interval_num,
action,
)
return self.state, reward, self.done, {}
class StockEnv_Acc(StockEnv):
def step(self, action):
start_time = time.time()
self.action_log[self.interval] = action
volume_t = self.action_func(
action,
self.target,
self.position,
max_step_num=self.max_step_num,
t=self.t - self.offset,
interval=self.interval,
interval_num=self.interval_num,
)
self.interval += 1
reward = 0.0
time_left = self.max_step_num - self.t - 1 + self.offset
time_left = min(self.time_interval, time_left)
v_t = np.repeat(volume_t / time_left, time_left)
minutes = np.arange(self.t + 1, self.t + time_left + 1)
if self.log:
log_index = minutes - self.offset
self.traded_log.iloc[log_index, 0] = v_t
self.traded_log.iloc[log_index, 4] = action
vwap_t = self.raw_df.iloc[minutes]["$vwap0"].values
vol_t = self.raw_df.iloc[minutes]["$volume0"].values
max_vol_t = self.limit * vol_t if self.limit < 1 else np.inf
v_t = np.minimum(v_t, max_vol_t)
if self.t + time_left == self.max_step_num - 1 + self.offset:
left = self.position - v_t.sum()
v_t[-1] += left
v_t = np.minimum(v_t, max_vol_t)
this_money = (v_t * vwap_t).sum()
this_vol = v_t.sum()
this_vwap = np.nan_to_num(this_money / this_vol)
self.t += time_left
self.position -= this_vol
self.this_cash += this_money
if self.log:
self.traded_log.iloc[log_index, 2] = v_t
if self.is_buy:
performance_raise = (1 - this_vwap / self.day_vwap) * 10000
PA_t = (1 - this_vwap / self.day_twap) * 10000
else:
performance_raise = (this_vwap / self.day_vwap - 1) * 10000
PA_t = (this_vwap / self.day_twap - 1) * 10000
for i, reward_func in enumerate(self.reward_func_list):
if reward_func.isinstant:
tmp_r = reward_func(performance_raise, v_t, self.target, PA_t)
reward += tmp_r * self.reward_coef[i]
self.reward_log_dict[type(reward_func).__name__] += tmp_r
if self.position < ZERO:
self.done = True
if self.interval == self.interval_num:
self.done = True
self.step_time.append(time.time() - start_time)
self.real_eps_time += time.time() - start_time
if self.done:
this_traded = self.target - self.position
this_vwap = (self.this_cash / this_traded) if this_traded > ZERO else self.day_vwap
valid = min(self.target, self.this_valid)
this_ffr = (this_traded / valid) if valid > ZERO else 1.0
if abs(this_ffr - 1.0) < ZERO:
this_ffr = 1.0
this_ffr *= 100
this_vv_ratio = this_vwap / self.day_vwap
vwap = self.raw_df["$vwap0"].values[self.offset : self.max_step_num + self.offset]
this_tt_ratio = this_vwap / np.nanmean(vwap)
if self.is_buy:
performance_raise = (1 - this_vv_ratio) * 10000
PA = (1 - this_tt_ratio) * 10000
else:
performance_raise = (this_vv_ratio - 1) * 10000
PA = (this_tt_ratio - 1) * 10000
for i, reward_func in enumerate(self.reward_func_list):
if not reward_func.isinstant:
tmp_r = reward_func(performance_raise, this_ffr, this_tt_ratio, self.is_buy)
reward += tmp_r * self.reward_coef[i]
self.reward_log_dict[type(reward_func).__name__] += tmp_r
self.state = self.obs(
self.raw_df,
self.feature_dfs,
self.t,
self.interval,
self.position,
self.target,
self.is_buy,
self.max_step_num,
self.interval_num,
action,
)
if self.log:
res = pd.DataFrame(
{
"target": self.target,
"sell": not self.is_buy,
"vwap": this_vwap,
"this_vv_ratio": this_vv_ratio,
"this_ffr": this_ffr,
},
index=[[self.ins], [self.date]],
)
money = self.target * self.day_vwap
if self.is_buy:
info = {
"money": money,
"money_buy": money,
"action": self.action_log,
"ffr": this_ffr,
"obs0_PR": performance_raise,
"ffr_buy": this_ffr,
"PR_buy": performance_raise,
"PA": PA,
"PA_buy": PA,
"vwap": this_vwap,
}
else:
info = {
"money": money,
"money_sell": money,
"action": self.action_log,
"ffr": this_ffr,
"obs0_PR": performance_raise,
"ffr_sell": this_ffr,
"PR_sell": performance_raise,
"PA": PA,
"PA_sell": PA,
"vwap": this_vwap,
}
info = merge_dicts(info, self.reward_log_dict)
if self.log:
info["df"] = self.traded_log
info["res"] = res
del self.feature_dfs
return self.state, reward, self.done, info
else:
self.state = self.obs(
self.raw_df,
self.feature_dfs,
self.t,
self.interval,
self.position,
self.target,
self.is_buy,
self.max_step_num,
self.interval_num,
action,
)
return self.state, reward, self.done, {}

351
examples/trade/executor.py Normal file
View File

@@ -0,0 +1,351 @@
import env
from vecenv import *
import sampler
import logger
import json
import os
import agent
import network
import policy
import random
import tianshou as ts
import tqdm
from tianshou.utils import tqdm_config, MovAvg
from torch.utils.tensorboard import SummaryWriter
from collector import *
import numpy as np
from util import merge_dicts
def get_best_gpu(force=None):
if force is not None:
return force
s = os.popen("nvidia-smi --query-gpu=memory.free --format=csv")
a = []
ss = s.read().replace("MiB", "").replace("memory.free", "").split("\n")
s.close()
for i in range(1, len(ss) - 1):
a.append(int(ss[i]))
best = int(np.argmax(a))
print("the best GPU is ", best, " with free memories of ", ss[best + 1])
return best
def setup_seed(seed):
"""
:param seed:
"""
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
class BaseExecutor(object):
def __init__(
self,
log_dir,
resources,
env_conf,
optim=None,
policy_conf=None,
network_conf=None,
policy_path=None,
seed=None,
):
"""A base class for executor
:param log_dir: The directory to write all the logs.
:type log_dir: string
:param resources: A dict which describes available computational resources.
:type resources: dict
:param env_conf: Configurations for the envionments.
:type env_conf: dict
:param optim: Optimization configuration, defaults to None
:type optim: dict, optional
:param policy_conf: Configurations for the RL algorithm, defaults to None
:type policy_conf: dict, optional
:param network_conf: Configurations for policy network_conf, defaults to None
:type network_conf: dict, optional
:param policy_path: If is not None, would load the policy from this path, defaults to None
:type policy_path: string, optional
:param seed: Random seed, defaults to None
:type seed: int, optional
"""
# self.config = config
self.log_dir = log_dir
print(self.log_dir)
if not os.path.exists(self.log_dir):
os.makedirs(self.log_dir)
if resources["device"] == "cuda":
resources["device"] = "cuda:" + str(get_best_gpu())
self.device = torch.device(resources["device"])
if seed:
setup_seed(seed)
assert not policy_path is None or not policy_conf is None, "Policy must be defined"
if policy_path:
self.policy = torch.load(policy_path, map_location=self.device)
self.policy.actor.extractor.device = self.device
# policy.eval()
elif hasattr(agent, policy_conf["name"]):
policy_conf["config"] = merge_dicts(policy_conf["config"], resources)
self.policy = getattr(agent, policy_conf["name"])(policy_conf["config"])
# print(self.policy)
else:
assert not network_conf is None
if "extractor" in network_conf.keys():
net = getattr(network, network_conf["extractor"]["name"] + "_Extractor")(
device=self.device, **network_conf["config"]
)
else:
net = getattr(network, network_conf["name"] + "_Extractor")(
device=self.device, **network_conf["config"]
)
net.to(self.device)
actor = getattr(network, network_conf["name"] + "_Actor")(
extractor=net, device=self.device, **network_conf["config"]
)
actor.to(self.device)
critic = getattr(network, network_conf["name"] + "_Critic")(
extractor=net, device=self.device, **network_conf["config"]
)
critic.to(self.device)
self.optim = torch.optim.Adam(
list(actor.parameters()) + list(critic.parameters()),
lr=optim["lr"],
weight_decay=optim["weight_decay"] if "weight_decay" in optim else 0.0,
)
self.dist = torch.distributions.Categorical
try:
self.policy = getattr(ts.policy, policy_conf["name"])(
actor, critic, self.optim, self.dist, **policy_conf["config"]
)
except:
self.policy = getattr(policy, policy_conf["name"])(
actor, critic, self.optim, self.dist, **policy_conf["config"]
)
self.writer = SummaryWriter(self.log_dir)
def train(
self,
max_epoch,
step_per_epoch,
repeat_per_collect,
collect_per_step,
batch_size,
iteration=0,
global_step=0,
early_stopping=5,
*args,
**kargs,
):
"""Run the whole training process.
:param max_epoch: The total number of epoch.
:param step_per_epoch: The times of bp in one epoch.
:param collect_per_step: Number of episodes to collect before one bp.
:param repeat_per_collect: Times of bps after every rould of experience collecting.
:param batch_size: Batch size when bp.
:param iteration: The iteration when starting the training, used when fine tuning. (Default value = 0)
:param global_step: The number of steps when starting the training, used when fine tuning. (Default value = 0)
:param early_stopping: If the test reward does not reach a new high in `early_stopping` iterations, the training would stop. (Default value = 5)
:returns: The result on test set.
"""
raise NotImplementedError
def train_round(self, repeat_per_collect, collect_per_step, batch_size, *args, **kargs):
"""Do an round of training
:param collect_per_step: Number of episodes to collect before one bp.
:param repeat_per_collect: Times of bps after every rould of experience collecting.
:param batch_size: Batch size when bp.
"""
raise NotImplementedError
def eval(self, order_dir, save_res=False, logdir=None, *args, **kargs):
"""Evaluate the policy on orders in order_dir
:param order_dir: the orders to be evaluated on.
:param save_res: whether the result of evaluation be saved to self.logdir/res.json (Default value = False)
:param logdir: the place to save the .log and .pkl log files to. If None, don't save logfiles. (Default value = None)
:returns: The result of evaluation.
"""
raise NotImplementedError
class Executor(BaseExecutor):
def __init__(
self,
log_dir,
resources,
env_conf,
train_paths,
valid_paths,
test_paths,
io_conf,
optim=None,
policy_conf=None,
network_conf=None,
policy_path=None,
seed=None,
share_memory=False,
buffer_size=200000,
q_learning=False,
*args,
**kargs,
):
"""[summary]
:param log_dir: The directory to write all the logs.
:type log_dir: string
:param resources: A dict which describes available computational resources.
:type resources: dict
:param env_conf: Configurations for the envionments.
:type env_conf: dict
:param train_paths: The paths of training datasets including orders, backtest files and features.
:type train_paths: string
:param valid_paths: The paths of validation datasets including orders, backtest files and features.
:type valid_paths: string
:param test_paths: The paths of test datasets including orders, backtest files and features.
:type test_paths: string
:param io_conf: Configuration for sampler and loggers.
:type io_conf: dict
:param share_memory: Whether to use shared memory vecnev, defaults to False
:type share_memory: bool, optional
:param buffer_size: The size of replay buffer, defaults to 200000
:type buffer_size: int, optional
"""
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network_conf, policy_path, seed)
single_env = getattr(env, env_conf["name"])
env_conf = merge_dicts(env_conf, train_paths)
env_conf["log"] = True
print("CPU_COUNT:", resources["num_cpus"])
if share_memory:
self.env = ShmemVectorEnv([lambda: single_env(env_conf) for _ in range(resources["num_cpus"])])
else:
self.env = SubprocVectorEnv([lambda: single_env(env_conf) for _ in range(resources["num_cpus"])])
self.test_collector = Collector(policy=self.policy, env=self.env, testing=True, reward_metric=np.sum)
self.train_collector = Collector(
self.policy, self.env, buffer=ts.data.ReplayBuffer(buffer_size), reward_metric=np.sum,
)
self.train_paths = train_paths
self.test_paths = test_paths
self.valid_paths = valid_paths
train_sampler_conf = train_paths
train_sampler_conf["features"] = env_conf["features"]
test_sampler_conf = test_paths
test_sampler_conf["features"] = env_conf["features"]
self.train_sampler = getattr(sampler, io_conf["train_sampler"])(train_sampler_conf)
self.test_sampler = getattr(sampler, io_conf["test_sampler"])(test_sampler_conf)
self.train_logger = logger.InfoLogger()
self.test_logger = getattr(logger, io_conf["test_logger"])
self.q_learning = q_learning
def train(
self,
max_epoch,
step_per_epoch,
repeat_per_collect,
collect_per_step,
batch_size,
iteration=0,
global_step=0,
early_stopping=5,
train_step_min=0,
log_valid=True,
*args,
**kargs,
):
best_epoch, best_reward = -1, -1
stat = {}
for epoch in range(1, 1 + max_epoch):
with tqdm.tqdm(total=step_per_epoch, desc=f"Epoch #{epoch}", **tqdm_config) as t:
while t.n < t.total:
result, losses = self.train_round(repeat_per_collect, collect_per_step, batch_size, iteration)
global_step += result["n/st"]
iteration += 1
for k in result.keys():
self.writer.add_scalar("Train/" + k, result[k], global_step=global_step)
for k in losses.keys():
if stat.get(k) is None:
stat[k] = MovAvg()
stat[k].add(losses[k])
self.writer.add_scalar("Train/" + k, stat[k].get(), global_step=global_step)
t.update(1)
if t.n <= t.total:
t.update()
result = self.eval(
self.valid_paths["order_dir"], logdir=f"{self.log_dir}/valid/{iteration}/" if log_valid else None,
)
for k in result.keys():
self.writer.add_scalar("Valid/" + k, result[k], global_step=global_step)
if best_epoch == -1 or best_reward < result["rew"]:
best_reward = result["rew"]
best_epoch = epoch
best_state = self.policy.state_dict()
early_stop_round = 0
torch.save(self.policy, f"{self.log_dir}/policy_best")
elif global_step >= train_step_min:
early_stop_round += 1
torch.save(self.policy, f"{self.log_dir}/policy_{epoch}")
print(
f'Epoch #{epoch}: test_reward: {result["rew"]:.4f}, ' # train_reward: {result_train["rew"]:.4f}, '
f"best_reward: {best_reward:.4f} in #{best_epoch}"
)
if early_stop_round >= early_stopping:
print("Early stopped")
break
print("Testing...")
self.policy.load_state_dict(best_state)
result = self.eval(self.test_paths["order_dir"], logdir=f"{self.log_dir}/test/", save_res=True)
for k in result.keys():
self.writer.add_scalar("Test/" + k, result[k], global_step=global_step)
return result
def train_round(self, repeat_per_collect, collect_per_step, batch_size, *args, **kargs):
self.policy.train()
self.env.toggle_log(False)
self.env.sampler = self.train_sampler
if not self.q_learning:
self.train_collector.reset()
result = self.train_collector.collect(n_episode=collect_per_step, log_fn=self.train_logger)
result = merge_dicts(result, self.train_logger.summary())
if not self.q_learning:
losses = self.policy.update(
0, self.train_collector.buffer, batch_size=batch_size, repeat=repeat_per_collect,
)
else:
losses = self.policy.update(batch_size, self.train_collector.buffer,)
return result, losses
def eval(self, order_dir, save_res=False, logdir=None, *args, **kargs):
print(f"start evaluating on {order_dir}")
self.policy.eval()
self.env.toggle_log(True)
self.test_sampler.reset(order_dir)
self.env.sampler = self.test_sampler
self.test_collector.reset()
if not logdir is None:
if not os.path.exists(logdir):
os.makedirs(logdir)
eval_logger = self.test_logger(logdir, order_dir)
eval_logger.reset()
else:
eval_logger = self.train_logger
result = self.test_collector.collect(log_fn=eval_logger)
result = merge_dicts(result, eval_logger.summary())
if save_res:
with open(self.log_dir + "/res.json", "w") as f:
json.dump(result, f, sort_keys=True, indent=4)
print(f"finish evaluating on {order_dir}")
return result

View File

@@ -0,0 +1,76 @@
seed: 42
task: train
log_dir: example/OPD
buffer_size: 80000
io_conf:
test_sampler: TestSampler
train_sampler: Sampler
test_logger: DFLogger
resources:
num_cpus: 24
num_gpus: 1
device: cuda
train_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/train/
valid_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/valid/
test_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/test/
env_conf:
name: StockEnv_Acc
max_step_num: 237
limit: 10
time_interval: 30
interval_num: 8
features:
- name: raw
type: range
loc: ../data/normed_feature/
size: 180
- name: teacher_action
type: interval
size: 1
loc: ../data/feature/teacher/
obs:
name: RuleTeacher
config: {}
action:
name: Static_Action
config:
action_num: 5
action_map: [0, 0.25, 0.5, 0.75, 1]
reward:
VP_Penalty_small_vec:
penalty: 100
coefficient: 1
policy_conf:
name: PPO_sup
config:
discount_factor: 1.
max_grad_norm: 100.
reward_normalization: False
eps_clip: 0.3
value_clip: True
vf_coef: 1.
gae_lambda: 1.
vf_clip_para: 0.3
sup_coef: 0.01
network_conf:
name: OPD
config:
hidden_size: 64
out_shape: 5
fc_size: 32
cnn_shape: [30, 6]
optim:
lr: 1e-4
batch_size: 1024
max_epoch: 30
step_per_epoch: 20
collect_per_step: 10000
repeat_per_collect: 5
early_stopping: 5
weight_decay: 0.

View File

@@ -0,0 +1,71 @@
seed: 42
task: train
log_dir: example/OPDS
buffer_size: 80000
io_conf:
test_sampler: TestSampler
train_sampler: Sampler
test_logger: DFLogger
resources:
num_cpus: 24
num_gpus: 1
device: cuda
train_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/train/
valid_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/valid/
test_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/test/
env_conf:
name: StockEnv_Acc
max_step_num: 237
limit: 10
time_interval: 30
interval_num: 8
features:
- name: raw
type: range
loc: ../data/normed_feature/
size: 180
obs:
name: TeacherObs
config: {}
action:
name: Static_Action
config:
action_num: 5
action_map: [0, 0.25, 0.5, 0.75, 1]
reward:
VP_Penalty_small_vec:
penalty: 100
coefficient: 1
policy_conf:
name: PPO
config:
discount_factor: 1.
max_grad_norm: 100.
reward_normalization: False
eps_clip: 0.3
value_clip: True
vf_coef: 1.
gae_lambda: 1.
vf_clip_para: 0.3
network_conf:
name: PPO
config:
hidden_size: 64
out_shape: 5
fc_size: 32
cnn_shape: [30, 6]
optim:
lr: 1e-4
batch_size: 1024
max_epoch: 30
step_per_epoch: 20
collect_per_step: 10000
repeat_per_collect: 5
early_stopping: 5
weight_decay: 0.

View File

@@ -0,0 +1,71 @@
seed: 42
task: train
log_dir: example/OPDT
buffer_size: 80000
io_conf:
test_sampler: TestSampler
train_sampler: Sampler
test_logger: DFLogger
resources:
num_cpus: 24
num_gpus: 1
device: cuda
train_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/train/
valid_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/valid/
test_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/test/
env_conf:
name: StockEnv_Acc
max_step_num: 237
limit: 10
time_interval: 30
interval_num: 8
features:
- name: raw
type: range
loc: ../data/normed_feature/
size: 180
obs:
name: TeacherObs
config: {}
action:
name: Static_Action
config:
action_num: 5
action_map: [0, 0.25, 0.5, 0.75, 1]
reward:
VP_Penalty_small_vec:
penalty: 100
coefficient: 1
policy_conf:
name: PPO
config:
discount_factor: 1.
max_grad_norm: 100.
reward_normalization: False
eps_clip: 0.3
value_clip: True
vf_coef: 1.
gae_lambda: 1.
vf_clip_para: 0.3
network_conf:
name: Teacher
config:
hidden_size: 64
out_shape: 5
fc_size: 32
cnn_shape: [30, 6]
optim:
lr: 1e-4
batch_size: 1024
max_epoch: 30
step_per_epoch: 20
collect_per_step: 10000
repeat_per_collect: 5
early_stopping: 5
weight_decay: 0.

View File

@@ -0,0 +1,76 @@
seed: 42
task: eval
log_dir: example/OPDT_b
buffer_size: 80000
io_conf:
test_sampler: TestSampler
train_sampler: Sampler
test_logger: DFLogger
resources:
num_cpus: 24
num_gpus: 1
device: cuda
train_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/train/
valid_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/valid/
test_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/all/
env_conf:
name: StockEnv_Acc
max_step_num: 237
limit: 10
time_interval: 30
interval_num: 8
features:
- name: raw
type: range
loc: ../data/normed_feature/
size: 180
obs:
name: TeacherObs
config: {}
action:
name: Static_Action
config:
action_num: 5
action_map: [0, 0.25, 0.5, 0.75, 1]
reward:
VP_Penalty_small_vec:
penalty: 100
coefficient: 1
policy_path: policy_best
policy_conf:
name: PPO
config:
discount_factor: 1.
max_grad_norm: 100.
reward_normalization: False
eps_clip: 0.3
value_clip: True
vf_coef: 1.
gae_lambda: 1.
vf_clip_para: 0.3
network_conf:
name: Teacher
config:
hidden_size: 64
out_shape: 5
fc_size: 32
cnn_shape: [30, 6]
optim:
lr: 1e-4
batch_size: 1024
max_epoch: 30
step_per_epoch: 20
collect_per_step: 10000
repeat_per_collect: 5
early_stopping: 5
weight_decay: 0.
search:
optim.weight_decay:
type: choice
value: [0.]

View File

@@ -0,0 +1,70 @@
seed: 42
task: train
log_dir: example/PPO
buffer_size: 80000
io_conf:
test_sampler: TestSampler
train_sampler: Sampler
test_logger: DFLogger
resources:
num_cpus: 24
num_gpus: 1
device: cuda
train_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/train/
valid_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/valid/
test_paths:
raw_dir: ../data/backtest/
order_dir: ../data/order/test/
env_conf:
name: StockEnv_Acc
max_step_num: 237
limit: 10
time_interval: 30
interval_num: 8
features:
- name: raw
type: range
loc: ../data/normed_feature/
size: 180
obs:
name: TeacherObs
config: {}
action:
name: Static_Action
config:
action_num: 5
action_map: [0, 0.25, 0.5, 0.75, 1]
reward:
PPO_Reward:
coefficient: 1
policy_conf:
name: PPO
config:
discount_factor: 1.
max_grad_norm: 100.
reward_normalization: False
eps_clip: 0.3
value_clip: True
vf_coef: 1.
gae_lambda: 1.
vf_clip_para: 0.3
network_conf:
name: PPO
config:
hidden_size: 64
out_shape: 5
fc_size: 32
cnn_shape: [30, 6]
optim:
lr: 1e-4
batch_size: 1024
max_epoch: 30
step_per_epoch: 20
collect_per_step: 10000
repeat_per_collect: 5
early_stopping: 5
weight_decay: 0.

View File

@@ -0,0 +1 @@
from .single_logger import *

View File

@@ -0,0 +1,231 @@
import pandas as pd
import numpy as np
import os
from multiprocessing import Queue, Process
import time
def GLR(values):
"""
Calculate -P(value | value > 0) / P(value | value < 0)
"""
pos = []
neg = []
for i in values:
if i > 0:
pos.append(i)
elif i < 0:
neg.append(i)
return -np.mean(pos) / np.mean(neg)
class DFLogger(object):
"""The logger for single-assert backtest.
Would save .pkl and .log in log_dir
"""
def __init__(self, log_dir, order_dir, writer=None):
self.order_dir = order_dir + "/"
self.log_dir = log_dir + "/"
if not os.path.exists(log_dir):
os.mkdir(log_dir)
self.queue = Queue(100000)
self.raw_log_dir = self.log_dir
@staticmethod
def _worker(log_dir, order_dir, queue):
df_cache = {}
stat_cache = {}
if not os.path.exists(log_dir):
os.mkdir(log_dir)
while True:
info = queue.get(block=True)
if info == "stop":
summary = {}
for k, v in stat_cache.items():
if not k.startswith("money"):
summary[k + "_std"] = np.nanstd(v)
summary[k + "_mean"] = np.nanmean(v)
try:
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
except:
# summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache['money_sell'])
pass
try:
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
except:
pass
try:
for k in ["obs0_PR", "ffr", "PA"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
except:
pass
summary["GLR"] = GLR(stat_cache["PA"])
try:
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
except:
pass
try:
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
except:
pass
queue.put(summary)
break
elif len(info) == 0:
continue
else:
df = info.pop("df")
res = info.pop("res")
ins = df.index[0][0]
if ins not in df_cache:
df_cache[ins] = (
[],
[],
(pd.read_pickle(order_dir + ins + ".pkl.target")['amount'] != 0).sum(),
)
df_cache[ins][0].append(df)
df_cache[ins][1].append(res)
if len(df_cache[ins][0]) == df_cache[ins][2]:
pd.concat(df_cache[ins][0]).to_pickle(log_dir + ins + ".log")
pd.concat(df_cache[ins][1]).to_pickle(log_dir + ins + ".pkl")
del df_cache[ins]
for k, v in info.items():
if k not in stat_cache:
stat_cache[k] = []
if hasattr(v, "__len__"):
stat_cache[k] += list(v)
else:
stat_cache[k].append(v)
def reset(self):
""" """
while not self.queue.empty():
self.queue.get()
assert self.queue.empty()
self.child = Process(target=self._worker, args=(self.log_dir, self.order_dir, self.queue), daemon=True,)
self.child.start()
def set_step(self, step):
self.log_dir = f"{self.raw_log_dir}{step}/"
self.reset()
def __call__(self, infos):
for info in infos:
if "env_id" in info:
info.pop("env_id")
self.update(infos)
def update(self, infos):
"""store values in info into the logger"""
for info in infos:
self.queue.put(info, block=True)
def summary(self):
""":return: The mean and std of values in infos stored in logger"""
summary = {}
self.queue.put("stop", block=True)
self.child.join()
self.child.close()
assert self.queue.qsize() == 1
summary = self.queue.get()
return summary
class InfoLogger(DFLogger):
""" """
def __init__(self, *args):
self.stat_cache = {}
self.queue = Queue(10000)
self.child = Process(target=self._worker, args=(self.queue,), daemon=True)
self.child.start()
def _worker(logdir, queue):
stat_cache = {}
while True:
info = queue.get(block=True)
if info == "stop":
summary = {}
for k, v in stat_cache.items():
if not k.startswith("money"):
summary[k + "_std"] = np.nanstd(v)
summary[k + "_mean"] = np.nanmean(v)
try:
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
except:
pass
try:
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
except:
pass
try:
for k in ["obs0_PR", "ffr", "PA"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
except:
pass
summary["GLR"] = GLR(stat_cache["PA"])
try:
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
except:
pass
try:
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
except:
pass
queue.put(summary)
stat_cache = {}
time.sleep(5)
continue
if len(info) == 0:
continue
for k, v in info.items():
if k == "res" or k == "df":
continue
if k not in stat_cache:
stat_cache[k] = []
if hasattr(v, "__len__"):
stat_cache[k] += list(v)
else:
stat_cache[k].append(v)
def _update(self, info):
if len(info) == 0:
return
ins = df.index[0][0]
for k, v in info.items():
if k not in self.stat_cache:
self.stat_cache[k] = []
if hasattr(v, "__len__"):
self.stat_cache[k] += list(v)
else:
self.stat_cache[k].append(v)
def summary(self):
""" """
while not self.queue.empty():
# print('not empty')
# print(self.queue.qsize())
time.sleep(1)
self.queue.put("stop")
# self.child.join()
time.sleep(1)
while not self.queue.qsize() == 1:
# print(self.queue.qsize())
time.sleep(1)
assert self.queue.qsize() == 1
summary = self.queue.get()
return summary
def set_step(self, step):
return

135
examples/trade/main.py Normal file
View File

@@ -0,0 +1,135 @@
import re
import os
import argparse
import yaml
from executor import Executor
import warnings
import redis
import subprocess
warnings.filterwarnings("ignore")
from util import merge_dicts
loader = yaml.FullLoader
loader.add_implicit_resolver(
"tag:yaml.org,2002:float",
re.compile(
"""^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
|[-+]?\\.(?:inf|Inf|INF)
|\\.(?:nan|NaN|NAN))$""",
re.X,
),
list("-+0123456789."),
)
def get_full_config(config, dir_name):
while "base" in config:
base_config = os.path.normpath(os.path.join(dir_name, config.pop("base")))
dir_name = os.path.dirname(base_config)
with open(base_config, "r") as f:
base_config = yaml.load(base_config, Loader=yaml.FullLoader)
config = merge_dicts(base_config, config)
return config
def run(config):
log_dir = config["log_dir"]
if not os.path.exists(log_dir):
os.makedirs(log_dir)
with open(log_dir + "/config.yml", "w") as f:
yaml.dump(config, f)
executor = Executor(**config)
if config["task"] == "train":
return executor.train(**config["optim"])
elif config["task"] == "eval":
return executor.eval(config["test_paths"]["order_dir"], save_res=True, logdir=config["log_dir"] + "/test/",)
else:
raise NotImplementedError
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", type=str)
parser.add_argument("-n", "--index", type=int, default=None)
args = parser.parse_args()
print(os.cpu_count())
EXP_PATH = os.environ["EXP_PATH"]
config_path = os.path.normpath(os.path.join(EXP_PATH, args.config))
EXP_NAME = os.path.relpath(config_path, EXP_PATH)
if os.path.isdir(config_path):
if not args.index is None:
with open(config_path + "/configs.yml") as f:
config_list = list(yaml.load_all(f, Loader=loader))
config = config_list[args.index]
if "PT_OUTPUT_DIR" in os.environ:
config["log_dir"] = os.environ["PT_OUTPUT_DIR"]
else:
log_prefix = os.environ["OUTPUT_DIR"] if "OUTPUT_DIR" in os.environ else "../log"
config["log_dir"] = os.path.join(log_prefix, config["log_dir"])
config = get_full_config(config, config_path)
run(config)
else:
redis_server = redis.Redis(
host=os.environ["REDIS_SERVER"],
port=os.environ["REDIS_PORT"],
db=0,
charset="utf-8",
decode_responses=True,
)
with open(config_path + "/configs.yml") as f:
config_list = list(yaml.load_all(f, Loader=loader))
config_num = len(config_list)
if not redis_server.exists(EXP_NAME):
for i in range(config_num):
redis_server.rpush(EXP_NAME, i)
redis_server.set(f"{EXP_NAME}_{i}", "Pending")
else:
if redis_server.llen(EXP_NAME) == 0:
for i in range(config_num):
if (
not redis_server.exists(f"{EXP_NAME}_{i}")
or redis_server.get(f"{EXP_NAME}_{i}") == "Failed"
):
redis_server.rpush(EXP_NAME, i)
redis_server.set(f"{EXP_NAME}_{i}", "Pending")
print(f"Starting..., {redis_server.llen(EXP_NAME)} trails to run")
while True:
index = redis_server.lpop(EXP_NAME)
if index is None:
print("All done")
break
index = int(index)
redis_server.set(f"{EXP_NAME}_{index}", "Running")
print(f"Trail_{index} is running")
try:
res = subprocess.run(["python", "main.py", "--config", args.config, "--index", str(index),],)
except KeyboardInterrupt:
redis_server.set(f"{EXP_NAME}_{index}", "Failed")
print(f"Trail_{index} has failed, {redis_server.llen(EXP_NAME)} trails to run")
break
if res.returncode == 0:
redis_server.set(f"{EXP_NAME}_{index}", "Finished")
print(f"Finish running one trail, {redis_server.llen(EXP_NAME)} trails to run")
else:
redis_server.set(f"{EXP_NAME}_{index}", "Failed")
print(f"Trail_{index} has failed, {redis_server.llen(EXP_NAME)} trails to run")
elif os.path.isfile(config_path):
assert config_path.endswith(".yml"), "Config file should be an yaml file"
EXP_NAME = EXP_NAME[:-4]
with open(config_path, "r") as f:
config = yaml.load(f, Loader=loader)
config = get_full_config(config, os.path.dirname(config_path))
log_prefix = os.environ["OUTPUT_DIR"] if "OUTPUT_DIR" in os.environ else "../log"
config["log_dir"] = os.path.join(log_prefix, config["log_dir"])
run(config)
else:
print("The config path should be a relative path from EXP_PATH")

View File

@@ -0,0 +1,5 @@
from .ppo import *
from .qmodel import *
from .teacher import *
from .util import *
from .opd import *

View File

@@ -0,0 +1,74 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class OPD_Extractor(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
teacher_action = inp[:, 0]
inp = inp[:, 1:]
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0]
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
feature = self.fc(fc_in)
return feature, teacher_action / 2
class OPD_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
feature, self.teacher_action = self.extractor(obs)
out = self.layer_out(feature)
return out, state
class OPD_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
feature, self.teacher_action = self.extractor(obs)
return self.value_out(feature).squeeze(dim=-1)

View File

@@ -0,0 +1,79 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class PPO_Extractor(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
# inp = torch.from_numpy(inp).to(torch.device('cpu'))
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, -19:-1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
assert not torch.isnan(cnn_out).any()
rnn_in = self.raw_fc(cnn_out)
assert not torch.isnan(rnn_in).any()
rnn2_in = self.dnn(dnn_in)
assert not torch.isnan(rnn2_in).any()
rnn2_out = self.rnn2(rnn2_in)[0]
assert not torch.isnan(rnn2_out).any()
rnn_out = self.rnn(rnn_in)[0]
assert not torch.isnan(rnn_out).any()
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
self.feature = self.fc(fc_in)
return self.feature
class PPO_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
assert not (torch.isnan(self.feature).any() | torch.isinf(self.feature).any()), f"{self.feature}"
out = self.layer_out(self.feature)
return out, state
class PPO_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
return self.value_out(self.feature).squeeze(dim=-1)

View File

@@ -0,0 +1,52 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class RNNQModel(nn.Module):
def __init__(self, device="cpu", out_shape=10, **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, 32),
nn.ReLU(),
nn.Linear(32, out_shape),
)
def forward(self, obs, state=None, info={}):
inp = to_torch(obs, dtype=torch.float32, device=self.device)
inp = inp[:, 182:]
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0]
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
out = self.fc(fc_in)
return out, state

View File

@@ -0,0 +1,69 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class Teacher_Extractor(nn.Module):
def __init__(self, device="cpu", feature_size=180, **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2) ## public part of state
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) ## private part of state
cnn_out = self.cnn(raw_in).view(batch_size, 8, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0][:, -1, :]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
self.feature = self.fc(fc_in)
return self.feature
class Teacher_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
out = self.layer_out(self.feature)
return out, state
class Teacher_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
return self.value_out(self.feature).squeeze(-1)

View File

@@ -0,0 +1,191 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class Attention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key):
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1])
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze() # B * l
weight = weight.softmax(dim=-1).unsqueeze(dim=-1) # B * l * 1
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class MaskAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key, seq_len, maxlen=9):
# seq_len: (batch,)
device = value.device
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1]) # (batch, 9, 64)
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1) # (batch, 9)
mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device)
weight[~mask] = float("-inf")
weight = weight.softmax(dim=-1).unsqueeze(dim=-1)
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class TFMaskAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key, seq_len, maxlen=9):
device = value.device
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1])
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1)
mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device)
mask = mask.repeat(1, 3) # (batch, 9*3)
weight[~mask] = float("-inf")
weight = weight.softmax(dim=-1).unsqueeze(dim=-1)
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class NNAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.q_net = nn.Linear(in_dim, out_dim)
self.k_net = nn.Linear(in_dim, out_dim)
self.v_net = nn.Linear(in_dim, out_dim)
def forward(self, Q, K, V):
q = self.q_net(Q)
k = self.k_net(K)
v = self.v_net(V)
attn = torch.einsum("ijk,ilk->ijl", q, k)
attn = attn.to(Q.device)
attn_prob = torch.softmax(attn, dim=-1)
attn_vec = torch.einsum("ijk,ikl->ijl", attn_prob, v)
return attn_vec
class Reshape(nn.Module):
def __init__(self, *args):
super(Reshape, self).__init__()
self.shape = args
def forward(self, x):
return x.view(self.shape)
class DARNN(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.emb_dim = kargs["emb_dim"]
self.hidden_size = kargs["hidden_size"]
self.num_layers = kargs["num_layers"]
self.is_bidir = kargs["is_bidir"]
self.dropout = kargs["dropout"]
self.seq_len = kargs["seq_len"]
self.interval = kargs["interval"]
self.today_length = 238
self.prev_length = 240
self.input_length = 480
self.input_size = 6
self.rnn = nn.LSTM(
input_size=self.input_size + self.emb_dim,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True,
bidirectional=self.is_bidir,
dropout=self.dropout,
)
self.prev_rnn = nn.LSTM(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True,
bidirectional=self.is_bidir,
dropout=self.dropout,
)
self.fc_out = nn.Linear(in_features=self.hidden_size * 2, out_features=1)
self.attention = NNAttention(self.hidden_size, self.hidden_size)
self.act_out = nn.Sigmoid()
if self.emb_dim != 0:
self.pos_emb = nn.Embedding(self.input_length, self.emb_dim)
def forward(self, inputs):
inputs = inputs.view(-1, self.input_length, self.input_size) # [B, T, F]
today_input = inputs[:, : self.today_length, :]
today_input = torch.cat((torch.zeros_like(today_input[:, :1, :]), today_input), dim=1)
prev_input = inputs[:, 240 : 240 + self.prev_length, :]
if self.emb_dim != 0:
embedding = self.pos_emb(torch.arange(end=self.today_length + 1, device=inputs.device))
embedding = embedding.repeat([today_input.size()[0], 1, 1])
today_input = torch.cat((today_input, embedding), dim=-1)
prev_outs, _ = self.prev_rnn(prev_input)
today_outs, _ = self.rnn(today_input)
outs = self.attention(today_outs, prev_outs, prev_outs)
outs = torch.cat((today_outs, outs), dim=-1)
outs = outs[:, range(0, self.seq_len * self.interval, self.interval), :]
# outs = self.fc_out(outs).squeeze()
return self.act_out(self.fc_out(outs).squeeze(-1)), outs
class Transpose(nn.Module):
def __init__(self, dim1=0, dim2=1):
super().__init__()
self.dim1 = dim1
self.dim2 = dim2
def forward(self, x):
return x.transpose(self.dim1, self.dim2)
class SelfAttention(nn.Module):
def __init__(self, *args, **kargs):
super().__init__()
self.attention = nn.MultiheadAttention(*args, **kargs)
def forward(self, x):
return self.attention(x, x, x)[0]
def onehot_enc(y, len):
y = y.unsqueeze(-1)
y_onehot = torch.zeros(y.shape[0], len)
# y_onehot.zero_()
y_onehot.scatter(1, y, 1)
return y_onehot
def sequence_mask(lengths, maxlen=None, dtype=torch.bool, device=None):
if maxlen is None:
maxlen = lengths.max()
mask = ~(torch.ones((len(lengths), maxlen), device=device).cumsum(dim=1).t() > lengths).t()
mask.type(dtype)
return mask

View File

@@ -0,0 +1,3 @@
from .ppo_obs import *
from .teacher_obs import *
from .obs_rule import *

View File

@@ -0,0 +1,136 @@
import pandas as pd
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
import math
import json
class BaseObs(object):
""" """
def __init__(self, config):
self._observation_space = None
def get_space(self):
""" """
return self._observation_space
def get_obs(self, t):
pass
class RuleObs(BaseObs):
"""The observation for minute-level rule-based agents, which consists of prediction, private state and direction information."""
def __init__(self, config):
feature_size = 0
self.features = config["features"]
self.time_interval = config["time_interval"]
self.max_step_num = config["max_step_num"]
for feature in self.features:
feature_size += feature["size"]
self._observation_space = Tuple(
(
Box(-np.inf, np.inf, shape=(feature_size,), dtype=np.float32),
Box(-np.inf, np.inf, shape=(4,), dtype=np.float32),
Discrete(2),
)
)
def __call__(self, *args, **kargs):
return self.get_obs(*args, **kargs)
def get_feature_res(self, df_list, time, interval, whole_day=False, interval_num=8):
"""
This method would extract the needed feature from the feature dataframe based on the feature name
and the description in feature config.
:param df_list: The dataframes of features, the order is consistent with the feature list.
:param time: The index of current minute of the day (starting from -1).
:param interval: The index of interval or decition making.
:param whole_day: if True, this method would return the concatenate of all dataframe.(Default value = False)
"""
predictions = []
if whole_day:
try:
prediction = [df_list[i].reshape(-1) for i in range(len(df_list))]
except:
prediction = [df_list[i].reshape(-1) for i in range(len(df_list))]
for i, p in enumerate(prediction):
if len(p) < interval_num:
prediction[i] = np.concatenate((p, np.zeros(interval_num - len(p))), axis=-1)
# res = np.stack(prediction).transpose().reshape(-1)
return np.concatenate(prediction)
for i in range(len(self.features)):
feature = self.features[i]
df = df_list[i]
size = feature["size"]
if feature["type"] == "inday":
if time == -1:
predictions += [0.0] * size
else:
predictions += df[size * time : size * (time + 1)].reshape(-1).tolist()
elif feature["type"] == "daily":
predictions += df.reshape(-1)[:size].tolist()
elif feature["type"] == "range":
if time == -1:
predictions += [0.0] * size
else:
predictions += df[time : size + time].reshape(-1).tolist()
elif feature["type"] == "interval":
if len(df[interval * size : (interval + 1) * size].reshape(-1)) == size:
predictions += df[interval * size : (interval + 1) * size].reshape(-1).tolist()
else:
predictions += [0.0] * size
elif feature["type"] == "step":
if len(df[size * (time + 1) : size * (time + 2)].reshape(-1)) == size:
predictions += df[size * (time + 1) : size * (time + 2)].reshape(-1).tolist()
else:
predictions += [0.0] * size
return np.array(predictions)
def get_obs(self, raw_df, feature_dfs, t, interval, position, target, is_buy, *args, **kargs):
private_state = np.array([position, target, t, self.max_step_num])
prediction_state = self.get_feature_res(feature_dfs, t, interval)
return {
"prediction": prediction_state,
"private": private_state,
"is_buy": int(is_buy),
}
class RuleInterval(RuleObs):
"""
The observation for interval_level rule based strategy.
Consist of interval prediction, private state, direction
"""
def get_obs(
self,
raw_df,
feature_dfs,
t,
interval,
position,
target,
is_buy,
max_step_num,
interval_num,
action=1.0,
*args,
**kargs
):
private_state = np.array([position, target, interval - 1, interval_num])
prediction_state = self.get_feature_res(feature_dfs, t, interval)
return {
"prediction": prediction_state,
"private": private_state,
"is_buy": int(is_buy),
"action": action,
}

View File

@@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
import math
import json
from .obs_rule import RuleObs
class PPOObs(RuleObs):
"""The observation defined in IJCAI 2020. The action of previous state is included in private state"""
def get_obs(
self, raw_df, feature_dfs, t, interval, position, target, is_buy, max_step_num, interval_num, action=0,
):
if t == -1:
self.private_states = []
public_state = self.get_feature_res(feature_dfs, t, interval, whole_day=True)
# market_state = feature_dfs[0].reshape(-1)[:6*240]
private_state = np.array([position / target, (t + 1) / max_step_num, action])
self.private_states.append(private_state)
list_private_state = np.concatenate(self.private_states)
list_private_state = np.concatenate(
(list_private_state, [0.0] * 3 * (interval_num + 1 - len(self.private_states)),)
)
seqlen = np.array([interval])
return np.concatenate((public_state, list_private_state, seqlen))

View File

@@ -0,0 +1,55 @@
import pandas as pd
import numpy as np
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
import math
import json
from .obs_rule import RuleObs
class TeacherObs(RuleObs):
"""
The Observation used for OPD method.
Consist of public state(raw feature), private state, seqlen
"""
def get_obs(
self, raw_df, feature_dfs, t, interval, position, target, is_buy, max_step_num, interval_num, *args, **kargs,
):
if t == -1:
self.private_states = []
public_state = self.get_feature_res(feature_dfs, t, interval, whole_day=True)
private_state = np.array([position / target, (t + 1) / max_step_num])
self.private_states.append(private_state)
list_private_state = np.concatenate(self.private_states)
list_private_state = np.concatenate(
(list_private_state, [0.0] * 2 * (interval_num + 1 - len(self.private_states)),)
)
seqlen = np.array([interval])
assert not (
np.isnan(list_private_state).any() | np.isinf(list_private_state).any()
), f"{private_state}, {target}"
assert not (np.isnan(public_state).any() | np.isinf(public_state).any()), f"{public_state}"
return np.concatenate((public_state, list_private_state, seqlen))
class RuleTeacher(RuleObs):
""" """
def get_obs(
self, raw_df, feature_dfs, t, interval, position, target, is_buy, max_step_num, interval_num, *args, **kargs,
):
if t == -1:
self.private_states = []
public_state = feature_dfs[0].reshape(-1)[: 6 * 240]
private_state = np.array([position / target, (t + 1) / max_step_num])
teacher_action = self.get_feature_res(feature_dfs, t, interval)[-self.features[1]["size"] :]
self.private_states.append(private_state)
list_private_state = np.concatenate(self.private_states)
list_private_state = np.concatenate(
(list_private_state, [0.0] * 2 * (interval_num + 1 - len(self.private_states)),)
)
seqlen = np.array([interval])
return np.concatenate((teacher_action, public_state, list_private_state, seqlen))

View File

@@ -0,0 +1,62 @@
import numpy as np
import pandas as pd
import os
import time
import datetime
from joblib import Parallel, delayed
data_path = '../data/'
in_dir = os.path.join(data_path, 'backtest/')
### create order folders ####
def generate_order(df, start, end):
# df['date'] = df.index.map(lambda x: x[1].date())
# df.set_index('date', append=True, inplace=True)
df = df.groupby('date').take(range(start, end)).droplevel(level=0)
div = df['$volume0'].rolling((end - start)*60).mean().shift(1).groupby(level='date').transform('first')
order = df.groupby(level=(2, 0)).mean().dropna()
order = pd.DataFrame(order)
order['amount'] = np.random.lognormal(-3.28, 1.14) * order['$volume0']
order['order_type'] = 0
order = order.drop(columns=["$volume0", "$vwap0"])
return order
def w_order(f, start, end):
df = pd.read_pickle(in_dir + f)
#df['date'] = df.index.get_level_values(1).map(lambda x: x.date())
#df = df.set_index('date', append=True, drop=True)
order = generate_order(df, start, end)
order_train = order[order.index.get_level_values(0) < '2020-12-01']
order_test = order[order.index.get_level_values(0) >= '2020-12-01']
order_valid = order_test[order_test.index.get_level_values(0) < '2021-01-01']
order_test = order_test[order_test.index.get_level_values(0) >= '2021-01-01']
if len(order_train) > 0:
order_train.to_pickle(train_path + f[:-9] + '.target')
if len(order_valid) > 0:
order_valid.to_pickle(valid_path + f[:-9] + '.target')
if len(order_test) > 0:
order_test.to_pickle(test_path + f[:-9] + '.target')
if len(order) > 0:
order.to_pickle(all_path + f[:-9] + '.target')
return 0
train_path = os.path.join(data_path, "order/train/")
if not os.path.exists(train_path):
os.makedirs(train_path)
valid_path = os.path.join(data_path, "order/valid/")
if not os.path.exists(valid_path):
os.makedirs(valid_path)
test_path = os.path.join(data_path, "order/test/")
if not os.path.exists(test_path):
os.makedirs(test_path)
all_path = os.path.join(data_path, "order/all/")
if not os.path.exists(all_path):
os.makedirs(all_path)
res = Parallel(n_jobs=64)(delayed(w_order)(f, 0, 239) for f in os.listdir(in_dir))
print(sum(res))

View File

@@ -0,0 +1,2 @@
from .ppo_supervision import *
from .ppo import *

View File

@@ -0,0 +1,255 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from typing import Dict, List, Tuple, Union, Optional
from tianshou.policy import PGPolicy
from tianshou.data import Batch, ReplayBuffer
from tianshou.data import to_torch
from numba import njit
import sys
sys.path.append("..")
from util import to_numpy, to_torch_as
def _episodic_return(
v_s_: np.ndarray, rew: np.ndarray, done: np.ndarray, gamma: float, gae_lambda: float,
) -> np.ndarray:
"""Numba speedup: 4.1s -> 0.057s."""
returns = np.roll(v_s_, 1)
m = (1.0 - done) * gamma
delta = rew + v_s_ * m - returns
m *= gae_lambda
gae = 0.0
for i in range(len(rew) - 1, -1, -1):
gae_new = delta[i] + m[i] * gae
gae = gae_new
returns[i] += gae
return returns
class PPO(PGPolicy):
""" The PPO policy with Teacher supervision"""
def __init__(
self,
actor: torch.nn.Module,
critic: torch.nn.Module,
optim: torch.optim.Optimizer,
dist_fn: torch.distributions.Distribution,
teacher=None,
discount_factor: float = 0.99,
max_grad_norm: Optional[float] = None,
eps_clip: float = 0.2,
vf_clip_para=10.0,
vf_coef: float = 0.5,
kl_coef=0.5,
kl_target=0.01,
ent_coef: float = 0.01,
sup_coef=0.1,
action_range: Optional[Tuple[float, float]] = None,
gae_lambda: float = 0.95,
dual_clip: Optional[float] = None,
value_clip: bool = True,
reward_normalization: bool = True,
**kwargs
) -> None:
super().__init__(None, None, dist_fn, discount_factor, **kwargs)
self._max_grad_norm = max_grad_norm
self._eps_clip = eps_clip
self._vf_clip_para = vf_clip_para
self._w_vf = vf_coef
self._w_ent = ent_coef
self._range = action_range
self.actor = actor
self.critic = critic
self.optim = optim
self.sup_coef = sup_coef
self.kl_target = kl_target
self.kl_coef = kl_coef
self._batch = 64
assert 0 <= gae_lambda <= 1, "GAE lambda should be in [0, 1]."
self._lambda = gae_lambda
assert dual_clip is None or dual_clip > 1, "Dual-clip PPO parameter should greater than 1."
self._dual_clip = dual_clip
self._value_clip = value_clip
self._rew_norm = reward_normalization
if not teacher is None:
self.teacher = torch.load(teacher, map_location=torch.device("cpu"))
self.teacher.to(self.actor.device)
self.teacher.actor.extractor.device = self.actor.device
else:
self.teacher = None
@staticmethod
def compute_episodic_return(
batch: Batch,
v_s_: Optional[Union[np.ndarray, torch.Tensor]] = None,
gamma: float = 0.99,
gae_lambda: float = 0.95,
rew_norm: bool = False,
) -> Batch:
"""Compute returns over given full-length episodes.
Implementation of Generalized Advantage Estimator (arXiv:1506.02438).
:param batch: a data batch which contains several full-episode data
chronologically.
:type batch: :class:`~tianshou.data.Batch`
:param v_s_: the value function of all next states :math:`V(s')`.
:type v_s_: numpy.ndarray
:param float gamma: the discount factor, should be in [0, 1], defaults
to 0.99.
:param float gae_lambda: the parameter for Generalized Advantage
Estimation, should be in [0, 1], defaults to 0.95.
:param bool rew_norm: normalize the reward to Normal(0, 1), defaults
to False.
:return: a Batch. The result will be stored in batch.returns as a numpy
array with shape (bsz, ).
"""
rew = batch.rew
v_s_ = np.zeros_like(rew) if v_s_ is None else to_numpy(v_s_.flatten())
assert not np.isnan(v_s_).any()
assert not np.isnan(rew).any()
assert not np.isnan(batch.done).any()
returns = _episodic_return(v_s_, rew, batch.done, gamma, gae_lambda)
assert not np.isnan(returns).any()
if rew_norm and not np.isclose(returns.std(), 0.0, 1e-2):
returns = (returns - returns.mean()) / returns.std()
assert not np.isnan(returns).any()
batch.returns = returns
return batch
def process_fn(self, batch: Batch, buffer: ReplayBuffer, indice: np.ndarray) -> Batch:
if self._rew_norm:
mean, std = batch.rew.mean(), batch.rew.std()
if not np.isclose(std, 0):
batch.rew = (batch.rew - mean) / std
assert not np.isnan(batch.rew).any()
if self._lambda in [0, 1]:
return self.compute_episodic_return(batch, None, gamma=self._gamma, gae_lambda=self._lambda)
else:
v_ = []
with torch.no_grad():
for b in batch.split(self._batch, shuffle=False):
v_.append(self.critic(b.obs_next))
v_ = to_numpy(torch.cat(v_, dim=0))
assert not np.isnan(v_).any()
return self.compute_episodic_return(batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
def forward(self, batch: Batch, state: Optional[Union[dict, Batch, np.ndarray]] = None, **kwargs) -> Batch:
"""Compute action over the given batch data."""
logits, h = self.actor(batch.obs, state=state, info=batch.info)
if isinstance(logits, tuple):
dist = self.dist_fn(*logits)
else:
dist = self.dist_fn(logits)
if self.training:
try:
act = dist.sample()
except:
print(logits)
act = dist.sample()
else:
act = torch.argmax(logits, dim=1)
if self._range:
act = act.clamp(self._range[0], self._range[1])
return Batch(logits=logits, act=act, state=h, dist=dist)
def learn(self, batch: Batch, batch_size: int, repeat: int, **kwargs) -> Dict[str, List[float]]:
self._batch = batch_size
losses, clip_losses, vf_losses, ent_losses, kl_losses = [], [], [], [], []
if self.teacher is not None:
supervision_losses = []
v = []
old_log_prob = []
feature = []
old_logits = []
with torch.no_grad():
for b in batch.split(batch_size, shuffle=False):
v.append(self.critic(b.obs))
b_ = self(b)
dist = b_.dist
logits = b_.logits
old_log_prob.append(dist.log_prob(to_torch_as(b.act, v[0])))
old_logits.append(logits)
if not self.teacher is None:
with torch.no_grad():
for b in batch.split(batch_size, shuffle=False):
self.teacher(b)
feature.append(self.teacher.actor.feature)
batch.old_feature = torch.cat(feature, dim=0)
batch.old_logits = torch.cat(old_logits, dim=0)
batch.v = torch.cat(v, dim=0) # old value
batch.act = to_torch_as(batch.act, v[0])
batch.logp_old = torch.cat(old_log_prob, dim=0)
batch.returns = to_torch_as(batch.returns, v[0]).reshape(batch.v.shape)
if self._rew_norm:
mean, std = batch.returns.mean(), batch.returns.std()
if not np.isclose(std.item(), 0):
batch.returns = (batch.returns - mean) / std
batch.adv = batch.returns - batch.v
if self._rew_norm:
mean, std = batch.adv.mean(), batch.adv.std()
if not np.isclose(std.item(), 0):
batch.adv = (batch.adv - mean) / std
for _ in range(repeat):
for b in batch.split(batch_size):
dist = self(b).dist
value = self.critic(b.obs)
if not self.teacher is None:
feature = self.actor.feature
# print(feature.pow(2).mean())
ratio = (dist.log_prob(b.act) - b.logp_old).exp().float()
surr1 = ratio * b.adv
surr2 = ratio.clamp(1.0 - self._eps_clip, 1.0 + self._eps_clip) * b.adv
if self._dual_clip:
clip_loss = -torch.max(torch.min(surr1, surr2), self._dual_clip * b.adv).mean()
else:
clip_loss = -torch.min(surr1, surr2).mean()
clip_losses.append(clip_loss.item())
if self._value_clip:
v_clip = b.v + (value - b.v).clamp(-self._vf_clip_para, self._vf_clip_para)
vf1 = (b.returns - value).pow(2)
vf2 = (b.returns - v_clip).pow(2)
vf_loss = torch.max(vf1, vf2).mean()
else:
vf_loss = (b.returns - value).pow(2).mean()
if not self.teacher is None:
supervision_loss = (b.old_feature - feature).pow(2).mean()
supervision_losses.append(supervision_loss.item())
kl = torch.distributions.kl.kl_divergence(self.dist_fn(b.old_logits), dist)
kl_loss = kl.mean()
kl_losses.append(kl_loss.item())
vf_losses.append(vf_loss.item())
e_loss = dist.entropy().mean()
ent_losses.append(e_loss.item())
loss = clip_loss + self._w_vf * vf_loss - self._w_ent * e_loss + self.kl_coef * kl_loss
if self.teacher is not None:
loss += self.sup_coef * supervision_loss
losses.append(loss.item())
self.optim.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(
list(self.actor.parameters()) + list(self.critic.parameters()), self._max_grad_norm,
)
self.optim.step()
cur_kl = np.mean(kl_losses)
if cur_kl > 2.0 * self.kl_target:
self.kl_coef *= 1.5
elif cur_kl < 0.5 * self.kl_target:
self.kl_coef *= 0.5
res = {
"loss/total_loss": losses,
"loss/policy": clip_losses,
"loss/vf": vf_losses,
"loss/entropy": ent_losses,
"loss/kl": kl_losses,
}
if not self.teacher is None:
res["loss/supervision"] = supervision_losses
return res
Student_new = PPO

View File

@@ -0,0 +1,187 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from typing import Dict, List, Tuple, Union, Optional
from tianshou.policy import PGPolicy
from tianshou.data import Batch, ReplayBuffer
from tianshou.data import to_torch
from numba import njit
import sys
sys.path.append("..")
from util import to_numpy, to_torch_as
from .ppo import _episodic_return
class PPO_sup(PGPolicy):
"""The PPO policy with a log-likelihood supervision loss"""
def __init__(
self,
actor: torch.nn.Module,
critic: torch.nn.Module,
optim: torch.optim.Optimizer,
dist_fn: torch.distributions.Distribution,
discount_factor: float = 0.99,
max_grad_norm: Optional[float] = None,
eps_clip: float = 0.2,
vf_clip_para=10.0,
vf_coef: float = 0.5,
kl_coef=0.5,
kl_target=0.01,
ent_coef: float = 0.01,
sup_coef=0.1,
action_range: Optional[Tuple[float, float]] = None,
gae_lambda: float = 0.95,
dual_clip: Optional[float] = None,
value_clip: bool = True,
reward_normalization: bool = True,
**kwargs
) -> None:
super().__init__(None, None, dist_fn, discount_factor, **kwargs)
self._max_grad_norm = max_grad_norm
self._eps_clip = eps_clip
self._vf_clip_para = vf_clip_para
self._w_vf = vf_coef
self._w_ent = ent_coef
self._range = action_range
self.actor = actor
self.critic = critic
self.optim = optim
self.sup_coef = sup_coef
self.kl_target = kl_target
self.kl_coef = kl_coef
self._batch = 64
assert 0 <= gae_lambda <= 1, "GAE lambda should be in [0, 1]."
self._lambda = gae_lambda
assert dual_clip is None or dual_clip > 1, "Dual-clip PPO parameter should greater than 1."
self._dual_clip = dual_clip
self._value_clip = value_clip
self._rew_norm = reward_normalization
def process_fn(self, batch: Batch, buffer: ReplayBuffer, indice: np.ndarray) -> Batch:
if self._rew_norm:
mean, std = batch.rew.mean(), batch.rew.std()
if not np.isclose(std, 0):
batch.rew = (batch.rew - mean) / std
if self._lambda in [0, 1]:
return self.compute_episodic_return(batch, None, gamma=self._gamma, gae_lambda=self._lambda)
else:
v_ = []
with torch.no_grad():
for b in batch.split(self._batch, shuffle=False):
v_.append(self.critic(b.obs_next))
v_ = to_numpy(torch.cat(v_, dim=0))
return self.compute_episodic_return(batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
def forward(self, batch: Batch, state: Optional[Union[dict, Batch, np.ndarray]] = None, **kwargs) -> Batch:
logits, h = self.actor(batch.obs, state=state, info=batch.info)
if isinstance(logits, tuple):
dist = self.dist_fn(*logits)
else:
dist = self.dist_fn(logits)
if self.training:
act = dist.sample()
else:
act = torch.argmax(logits, dim=1)
if self._range:
act = act.clamp(self._range[0], self._range[1])
return Batch(logits=logits, act=act, state=h, dist=dist)
def learn(self, batch: Batch, batch_size: int, repeat: int, **kwargs) -> Dict[str, List[float]]:
self._batch = batch_size
losses, clip_losses, vf_losses, ent_losses, kl_losses, supervision_losses = (
[],
[],
[],
[],
[],
[],
)
v = []
old_log_prob = []
teacher_action = []
old_logits = []
with torch.no_grad():
for b in batch.split(batch_size, shuffle=False):
v.append(self.critic(b.obs))
b_ = self(b)
dist = b_.dist
logits = b_.logits
old_log_prob.append(dist.log_prob(to_torch_as(b.act, v[0])))
old_logits.append(logits)
teacher_action.append(self.actor.teacher_action)
batch.teacher_action = torch.cat(teacher_action, dim=0).to(torch.long)
batch.old_logits = torch.cat(old_logits, dim=0)
batch.v = torch.cat(v, dim=0) # old value
batch.act = to_torch_as(batch.act, v[0])
batch.logp_old = torch.cat(old_log_prob, dim=0)
batch.returns = to_torch_as(batch.returns, v[0]).reshape(batch.v.shape)
if self._rew_norm:
mean, std = batch.returns.mean(), batch.returns.std()
if not np.isclose(std.item(), 0):
batch.returns = (batch.returns - mean) / std
batch.adv = batch.returns - batch.v
if self._rew_norm:
mean, std = batch.adv.mean(), batch.adv.std()
if not np.isclose(std.item(), 0):
batch.adv = (batch.adv - mean) / std
for _ in range(repeat):
for b in batch.split(batch_size):
res = self(b)
logits = res.logits
dist = res.dist
value = self.critic(b.obs)
ratio = (dist.log_prob(b.act) - b.logp_old).exp().float()
surr1 = ratio * b.adv
surr2 = ratio.clamp(1.0 - self._eps_clip, 1.0 + self._eps_clip) * b.adv
if self._dual_clip:
clip_loss = -torch.max(torch.min(surr1, surr2), self._dual_clip * b.adv).mean()
else:
clip_loss = -torch.min(surr1, surr2).mean()
clip_losses.append(clip_loss.item())
if self._value_clip:
v_clip = b.v + (value - b.v).clamp(-self._vf_clip_para, self._vf_clip_para)
vf1 = (b.returns - value).pow(2)
vf2 = (b.returns - v_clip).pow(2)
vf_loss = torch.max(vf1, vf2).mean()
else:
vf_loss = (b.returns - value).pow(2).mean()
supervision_loss = F.nll_loss(logits.log(), b.teacher_action)
supervision_losses.append(supervision_loss.item())
kl = torch.distributions.kl.kl_divergence(self.dist_fn(b.old_logits), dist)
kl_loss = kl.mean()
kl_losses.append(kl_loss.item())
vf_losses.append(vf_loss.item())
e_loss = dist.entropy().mean()
ent_losses.append(e_loss.item())
loss = clip_loss + self._w_vf * vf_loss - self._w_ent * e_loss + self.kl_coef * kl_loss
loss += self.sup_coef * supervision_loss
losses.append(loss.item())
self.optim.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(
list(self.actor.parameters()) + list(self.critic.parameters()), self._max_grad_norm,
)
self.optim.step()
if hasattr(self.actor, "callback"):
self.actor.callback()
cur_kl = np.mean(kl_losses)
if cur_kl > 2.0 * self.kl_target:
self.kl_coef *= 1.5
elif cur_kl < 0.5 * self.kl_target:
self.kl_coef *= 0.5
res = {
"loss/total_loss": losses,
"loss/policy": clip_losses,
"loss/vf": vf_losses,
"loss/entropy": ent_losses,
"loss/kl": kl_losses,
"loss/supervision": supervision_losses,
}
return res

View File

@@ -0,0 +1,10 @@
gym==0.17.3
torch==1.6.0
numba==0.51.2
numpy==1.19.1
pandas==1.1.3
tqdm==4.50.2
tianshou==0.3.0.post1
env==0.1.0
PyYAML==5.4.1
redis==3.5.3

View File

@@ -0,0 +1,4 @@
from .base import *
from .pa_penalty import *
from .ppo_reward import *
from .vp_penalty import *

View File

@@ -0,0 +1,38 @@
import numpy as np
class Abs_Reward(object):
"""The abstract class for Reward."""
def __init__(self, config):
return
def get_reward(self):
""":return: reward"""
reward = 0
return reward
def __call__(self, *args, **kargs):
return self.get_reward(*args, **kargs)
def isinstant(self):
""":return: Whether the reward should be given at every timestep or only at the end of this episode."""
raise NotImplementedError
class Instant_Reward(Abs_Reward):
def __init__(self, config):
self.ffr_ratio = config["ffr_ratio"]
self.vvr_ratio = config["vvr_ratio"]
def isinstant(self):
return True
class EndEpisode_Reward(Abs_Reward):
def __init__(self, config):
self.ffr_ratio = config["ffr_ratio"]
self.vvr_ratio = config["vvr_ratio"]
def isinstant(self):
return False

View File

@@ -0,0 +1,14 @@
import numpy as np
from .base import Instant_Reward
class PA_Penalty(Instant_Reward):
"""Reward: (Abs(tt_ratio_t - 1) * 10000 * v_t / target - v_t^2 * penalty) / 100"""
def __init__(self, config):
self.penalty = config["penalty"]
def get_reward(self, performance_raise, v_t, target, PA_t, *args):
reward = PA_t * v_t / target
reward -= self.penalty * (v_t / target) ** 2
return reward / 100

View File

@@ -0,0 +1,22 @@
import numpy as np
from .base import Abs_Reward
class PPO_Reward(Abs_Reward):
"""The reward function defined in IJCAI 2020"""
def __init__(self, *args):
pass
def isinstant(self):
return False
def get_reward(self, performace_raise, ffr, this_tt_ratio, is_buy):
if is_buy:
this_tt_ratio = 1 / this_tt_ratio
if this_tt_ratio < 1:
return -1.0
elif this_tt_ratio < 1.1:
return 0.0
else:
return 1.0

View File

@@ -0,0 +1,37 @@
import numpy as np
from .base import Instant_Reward
class VP_Penalty_small(Instant_Reward):
"""Reward: (Abs(vv_ratio_t - 1) * 10000 - v_t^2 * penalty) / 100"""
def __init__(self, config):
self.penalty = config["penalty"]
def get_reward(self, performance_raise, v_t, target, *args):
"""
:param performance_raise: Abs(vv_ratio_t - 1) * 10000.
:param target: Target volume
:param v_t: The traded volume
"""
assert target > 0
reward = performance_raise * v_t / target
reward -= self.penalty * (v_t / target) ** 2
assert not (np.isnan(reward) or np.isinf(reward)), f"{performance_raise}, {v_t}, {target}"
return reward / 100
class VP_Penalty_small_vec(VP_Penalty_small):
def get_reward(self, performance_raise, v_t, target, *args):
"""
:param performance_raise: Abs(vv_ratio_t - 1) * 10000.
:param target: Target volume
:param v_t: The traded volume
"""
assert target > 0
reward = performance_raise * v_t.sum() / target
reward -= self.penalty * ((v_t / target) ** 2).sum()
assert not (np.isnan(reward) or np.isinf(reward)), f"{performance_raise}, {v_t}, {target}"
return reward / 100

View File

@@ -0,0 +1 @@
from .single_sampler import *

View File

@@ -0,0 +1,184 @@
import pandas as pd
import numpy as np
from multiprocessing.context import Process
from multiprocessing import Queue
import os
import sys
sys.path.append("..")
def toArray(data):
if type(data) == np.ndarray:
return data
elif type(data) == list:
data = np.array(data)
return data
elif type(data) == pd.DataFrame:
share_index = toArray(data.index)
share_value = toArray(data.values)
share_colmns = toArray(data.columns)
return share_index, share_value, share_colmns
else:
try:
share_array = np.array(data)
return share_array
except:
raise NotImplementedError
class Sampler:
"""The sampler for training of single-assert RL."""
def __init__(self, config):
self.raw_dir = config["raw_dir"] + "/"
self.order_dir = config["order_dir"] + "/"
self.ins_list = [f[:-11] for f in os.listdir(self.order_dir) if f.endswith("target")]
self.features = config["features"]
self.queue = Queue(1000)
self.child = None
self.ins = None
self.raw_df = None
self.df_list = None
self.order_df = None
@staticmethod
def _worker(order_dir, raw_dir, features, ins_list, queue):
ins = None
index = 0
date_list = []
while True:
if ins is None or index == len(date_list):
ins = np.random.choice(ins_list, 1)[0]
# print(ins)
order_df = pd.read_pickle(order_dir + ins + ".pkl.target")
feature_df_list = []
for feature in features:
feature_df_list.append(pd.read_pickle(f"{feature['loc']}/{ins}.pkl"))
raw_df = pd.read_pickle(raw_dir + ins + ".pkl.backtest")
date_list = order_df.index.get_level_values(0).tolist()
index = 0
date = date_list[index]
day_order_df = order_df.iloc[index]
target = day_order_df["amount"]
index += 1
if target == 0:
continue
day_feature_dfs = []
day_raw_df = raw_df.loc[pd.IndexSlice[ins, :, date]]
is_buy = bool(day_order_df["order_type"])
for df in feature_df_list:
day_feature_dfs.append(df.loc[ins, date].values)
day_feature_dfs = np.array(day_feature_dfs)
day_raw_df_index, day_raw_df_value, day_raw_df_column = toArray(day_raw_df)
day_feature_dfs_ = toArray(day_feature_dfs)
queue.put(
(ins, date, day_raw_df_value, day_raw_df_column, day_raw_df_index, day_feature_dfs_, target, is_buy,),
block=True,
)
def _sample_ins(self):
""" """
return np.random.choice(self.ins_list, 1)[0]
def reset(self):
""" """
if self.child is None:
self.child = Process(
target=self._worker,
args=(self.order_dir, self.raw_dir, self.features, self.ins_list, self.queue,),
daemon=True,
)
self.child.start()
def sample(self):
""" """
sample = self.queue.get(block=True)
return sample
def stop(self):
""" """
try:
self.child.terminate()
except:
for p in self.child:
p.terminate()
class TestSampler(Sampler):
"""The sampler for backtest of single-assert strategies."""
def __init__(self, config):
super().__init__(config)
self.ins_index = -1
def _sample_ins(self):
""" """
self.ins_index += 1
if self.ins_index >= len(self.ins_list):
return None
else:
return self.ins_list[self.ins_index]
@staticmethod
def _worker(order_dir, raw_dir, features, ins_list, queue):
for ins in ins_list:
order_df = pd.read_pickle(order_dir + ins + ".pkl.target")
df_list = []
for feature in features:
df_list.append(pd.read_pickle(f"{feature['loc']}/{ins}.pkl"))
raw_df = pd.read_pickle(raw_dir + ins + ".pkl.backtest")
date_list = order_df.index.get_level_values(0).tolist()
for index in range(len(date_list)):
date = date_list[index]
day_df_list = []
day_raw_df = raw_df.loc[pd.IndexSlice[ins, :, date]]
day_order_df = order_df.iloc[index]
target = day_order_df["amount"]
if target == 0:
continue
is_buy = bool(day_order_df["order_type"])
for df in df_list:
day_df_list.append(df.loc[ins, date].values)
day_feature_dfs = np.array(day_df_list)
day_raw_df_index, day_raw_df_value, day_raw_df_column = toArray(day_raw_df)
day_feature_dfs_ = toArray(day_feature_dfs)
queue.put(
(
ins,
date,
day_raw_df_value,
day_raw_df_column,
day_raw_df_index,
day_feature_dfs_,
target,
is_buy,
),
block=True,
)
for _ in range(100):
queue.put(None)
def reset(self, order_dir=None):
"""
reset the sampler and change self.order_dir if order_dir is not None.
"""
if order_dir:
self.order_dir = order_dir
self.ins_list = [f[:-11] for f in os.listdir(self.order_dir) if f.endswith("target")]
if not self.child is None:
self.child.terminate()
while not self.queue.empty():
self.queue.get()
self.child = Process(
target=self._worker,
args=(self.order_dir, self.raw_dir, self.features, self.ins_list, self.queue,),
daemon=True,
)
self.child.start()

View File

@@ -0,0 +1,28 @@
import pandas as pd
import os
data_path = '../data/'
feature_path = os.path.join(data_path, 'feature/teacher/')
if not os.path.exists(feature_path):
os.makedirs(feature_path)
log_file = os.path.join(os.environ.get('OUTPUT_DIR'),'example/OPDT_b/test/')
files = os.listdir(log_file)
for f in files:
if f.endswith(".log"):
df = pd.read_pickle(log_file + f)
#df['datetime'] = df.index.get_level_values(1).map(lambda x: x[1])
df['datetime'] = df.index.get_level_values(1)
df.set_index('datetime', append=True, drop=True, inplace=True)
action = df['action']
action = action.reset_index(level=1, drop=True)
action.index = action.index.map(lambda x: (x[0], x[1], x[2].time()))
action = action.unstack().iloc[:, ::30] * 2
action = action.fillna(0)
train_action = action.astype("int")
final = train_action
final.to_pickle(feature_path + f[:-4] + '.pkl')

303
examples/trade/util.py Normal file
View File

@@ -0,0 +1,303 @@
from collections import namedtuple
from torch.nn.utils.rnn import pack_padded_sequence
from tianshou.data import Batch
import numpy as np
import torch
import copy
from typing import Union, Optional
from numbers import Number
def nan_weighted_avg(vals, weights, axis=None):
"""
:param vals: The values to be averaged on.
:param weights: The weights of weighted avrage.
:param axis: On which axis to calculate the weighted avrage. (Default value = None)
"""
assert vals.shape == weights.shape, AssertionError(f"{vals.shape} & {weights.shape}")
vals = vals.copy()
weights = weights.copy()
res = (vals * weights).sum(axis=axis) / weights.sum(axis=axis)
return np.nan_to_num(res, nan=vals[0])
def robust_auc(y_true, y_pred):
"""
Calculate AUC.
"""
try:
return roc_auc_score(y_true, y_pred)
except:
return np.nan
def merge_dicts(d1, d2):
"""
:param d1: Dict 1.
:type d1: dict
:param d2: Dict 2.
:returns: A new dict that is d1 and d2 deep merged.
:rtype: dict
"""
merged = copy.deepcopy(d1)
deep_update(merged, d2, True, [])
return merged
def deep_update(
original, new_dict, new_keys_allowed=False, whitelist=None, override_all_if_type_changes=None,
):
"""Updates original dict with values from new_dict recursively.
If new key is introduced in new_dict, then if new_keys_allowed is not
True, an error will be thrown. Further, for sub-dicts, if the key is
in the whitelist, then new subkeys can be introduced.
:param original: Dictionary with default values.
:type original: dict
:param new_dict(dict: dict): Dictionary with values to be updated
:param new_keys_allowed: Whether new keys are allowed. (Default value = False)
:type new_keys_allowed: bool
:param whitelist: List of keys that correspond to dict
values where new subkeys can be introduced. This is only at the top
level. (Default value = None)
:type whitelist: Optional[List[str]]
:param override_all_if_type_changes: List of top level
keys with value=dict, for which we always simply override the
entire value (dict), iff the "type" key in that value dict changes. (Default value = None)
:type override_all_if_type_changes: Optional[List[str]]
:param new_dict:
"""
whitelist = whitelist or []
override_all_if_type_changes = override_all_if_type_changes or []
for k, value in new_dict.items():
if k not in original and not new_keys_allowed:
raise Exception("Unknown config parameter `{}` ".format(k))
# Both orginal value and new one are dicts.
if isinstance(original.get(k), dict) and isinstance(value, dict):
# Check old type vs old one. If different, override entire value.
if (
k in override_all_if_type_changes
and "type" in value
and "type" in original[k]
and value["type"] != original[k]["type"]
):
original[k] = value
# Whitelisted key -> ok to add new subkeys.
elif k in whitelist:
deep_update(original[k], value, True)
# Non-whitelisted key.
else:
deep_update(original[k], value, new_keys_allowed)
# Original value not a dict OR new value not a dict:
# Override entire value.
else:
original[k] = value
return original
def get_seqlen(done_seq):
"""
:param done_seq:
"""
seqlen = []
length = 0
for i, done in enumerate(done_seq):
length += 1
if done:
seqlen.append(length)
length = 0
if length > 0:
seqlen.append(length)
return np.array(seqlen)
def generate_seq(seqlen, list):
"""
:param seqlen: param list:
:param list:
"""
res = []
index = 0
maxlen = np.max(seqlen)
for i in seqlen:
if isinstance(list, torch.Tensor):
res.append(torch.cat((list[index : index + i], torch.zeros_like(list[: maxlen - i])), dim=0,))
else:
res.append(np.concatenate((list[index : index + i], np.zeros_like(list[: maxlen - i])), axis=0))
index += i
if isinstance(list, torch.Tensor):
res = torch.stack(res, dim=0)
else:
res = np.stack(res, axis=0)
return res
def sequence_batch(batch):
"""
:param batch:
"""
seqlen = get_seqlen(batch.done)
# print(seqlen.max())
# print(len(seqlen))
res = Batch()
# print(batch.keys())
for v in batch.keys():
if v not in ["policy", "info"]:
res[v] = generate_seq(seqlen, batch[v])
else:
res[v] = batch[v]
res.seqlen = seqlen
return res
def flatten_seq(seq, seqlen):
"""
:param seq: param seqlen:
:param seqlen:
"""
res = []
for i, length in enumerate(seqlen):
res.append(seq[i][:length])
if isinstance(seq, torch.Tensor):
res = torch.cat(res, dim=0)
else:
res = np.concatenate(res, axis=0)
return res
def flatten_batch(batch):
"""
:param batch:
"""
for v in batch.keys():
if v in ["policy", "info", "seqlen"]:
continue
batch[v] = flatten_seq(batch[v], batch.seqlen)
return batch
def to_numpy(
x: Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor]
) -> Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor]:
"""
:param x: Union[Batch:
:param dict: param list:
:param tuple: param np.ndarray:
:param torch: Tensor]:
:param x: Union[Batch:
:param list:
:param np.ndarray:
:param torch.Tensor]:
:param x: Union[Batch:
"""
if isinstance(x, torch.Tensor):
x = x.detach().cpu().numpy()
elif isinstance(x, dict):
for k, v in x.items():
x[k] = to_numpy(v)
elif isinstance(x, Batch):
x.to_numpy()
elif isinstance(x, (list, tuple)):
try:
x = to_numpy(_parse_value(x))
except TypeError:
x = [to_numpy(e) for e in x]
else: # fallback
x = np.asanyarray(x)
return x
def to_torch(
x: Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor],
dtype: Optional[torch.dtype] = None,
device: Union[str, int, torch.device] = "cpu",
) -> Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor]:
"""
:param x: Union[Batch:
:param dict: param list:
:param tuple: param np.ndarray:
:param torch: Tensor]:
:param dtype: Optional[torch.dtype]: (Default value = None)
:param device: Union[str:
:param int: param torch.device]: (Default value = 'cpu')
:param x: Union[Batch:
:param list:
:param np.ndarray:
:param torch.Tensor]:
:param dtype: Optional[torch.dtype]: (Default value = None)
:param device: Union[str:
:param torch.device]: (Default value = 'cpu')
:param x: Union[Batch:
:param dtype: Optional[torch.dtype]: (Default value = None)
:param device: Union[str:
"""
if isinstance(x, torch.Tensor):
if dtype is not None:
x = x.type(dtype)
x = x.to(device)
elif isinstance(x, dict):
for k, v in x.items():
x[k] = to_torch(v, dtype, device)
elif isinstance(x, Batch):
x.to_torch(dtype, device)
elif isinstance(x, (np.number, np.bool_, Number)):
x = to_torch(np.asanyarray(x), dtype, device)
elif isinstance(x, (list, tuple)):
try:
x = to_torch(_parse_value(x), dtype, device)
except TypeError:
x = [to_torch(e, dtype, device) for e in x]
else: # fallback
x = np.asanyarray(x)
if issubclass(x.dtype.type, (np.bool_, np.number)):
x = torch.from_numpy(x).to(device)
if dtype is not None:
x = x.type(dtype)
else:
raise TypeError(f"object {x} cannot be converted to torch.")
return x
def to_torch_as(x: Union[torch.Tensor, dict, Batch, np.ndarray], y: torch.Tensor) -> Union[dict, Batch, torch.Tensor]:
"""
:param x: Union[torch.Tensor:
:param dict: param Batch:
:param np: ndarray]:
:param y: torch.Tensor:
:param x: Union[torch.Tensor:
:param Batch:
:param np.ndarray]:
:param y: torch.Tensor:
:param x: Union[torch.Tensor:
:param y: torch.Tensor:
:returns: to_torch(x, dtype=y.dtype, device=y.device)``.
"""
assert isinstance(y, torch.Tensor)
return to_torch(x, dtype=y.dtype, device=y.device)

695
examples/trade/vecenv.py Normal file
View File

@@ -0,0 +1,695 @@
import gym
import time
import ctypes
import numpy as np
from collections import OrderedDict
from multiprocessing.context import Process
from multiprocessing import Array, Pipe, connection, Queue
from typing import Any, List, Tuple, Union, Callable, Optional
from tianshou.env.worker import EnvWorker
from tianshou.env.utils import CloudpickleWrapper
_NP_TO_CT = {
np.bool: ctypes.c_bool,
np.bool_: ctypes.c_bool,
np.uint8: ctypes.c_uint8,
np.uint16: ctypes.c_uint16,
np.uint32: ctypes.c_uint32,
np.uint64: ctypes.c_uint64,
np.int8: ctypes.c_int8,
np.int16: ctypes.c_int16,
np.int32: ctypes.c_int32,
np.int64: ctypes.c_int64,
np.float32: ctypes.c_float,
np.float64: ctypes.c_double,
}
class ShArray:
"""Wrapper of multiprocessing Array."""
def __init__(self, dtype: np.generic, shape: Tuple[int]) -> None:
self.arr = Array(
_NP_TO_CT[dtype.type], # type: ignore
int(np.prod(shape)),
)
self.dtype = dtype
self.shape = shape
def save(self, ndarray: np.ndarray) -> None:
"""
:param ndarray: np.ndarray:
:param ndarray: np.ndarray:
:param ndarray: np.ndarray:
"""
assert isinstance(ndarray, np.ndarray)
dst = self.arr.get_obj()
dst_np = np.frombuffer(dst, dtype=self.dtype).reshape(self.shape)
np.copyto(dst_np, ndarray)
def get(self) -> np.ndarray:
""" """
obj = self.arr.get_obj()
return np.frombuffer(obj, dtype=self.dtype).reshape(self.shape)
def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
"""
:param space: gym.Space:
:param space: gym.Space:
:param space: gym.Space:
"""
if isinstance(space, gym.spaces.Dict):
assert isinstance(space.spaces, OrderedDict)
return {k: _setup_buf(v) for k, v in space.spaces.items()}
elif isinstance(space, gym.spaces.Tuple):
assert isinstance(space.spaces, tuple)
return tuple([_setup_buf(t) for t in space.spaces])
else:
return ShArray(space.dtype, space.shape)
def _worker(
parent: connection.Connection,
p: connection.Connection,
env_fn_wrapper: CloudpickleWrapper,
obs_bufs: Optional[Union[dict, tuple, ShArray]] = None,
) -> None:
"""
:param parent: connection.Connection:
:param p: connection.Connection:
:param env_fn_wrapper: CloudpickleWrapper:
:param obs_bufs: Optional[Union[dict:
:param tuple: param ShArray]]: (Default value = None)
:param parent: connection.Connection:
:param p: connection.Connection:
:param env_fn_wrapper: CloudpickleWrapper:
:param obs_bufs: Optional[Union[dict:
:param ShArray]]: (Default value = None)
:param parent: connection.Connection:
:param p: connection.Connection:
:param env_fn_wrapper: CloudpickleWrapper:
:param obs_bufs: Optional[Union[dict:
"""
def _encode_obs(obs: Union[dict, tuple, np.ndarray], buffer: Union[dict, tuple, ShArray],) -> None:
"""
:param obs: Union[dict:
:param tuple: param np.ndarray]:
:param buffer: Union[dict:
:param ShArray:
:param obs: Union[dict:
:param np.ndarray]:
:param buffer: Union[dict:
:param ShArray]:
:param obs: Union[dict:
:param buffer: Union[dict:
"""
if isinstance(obs, np.ndarray) and isinstance(buffer, ShArray):
buffer.save(obs)
elif isinstance(obs, tuple) and isinstance(buffer, tuple):
for o, b in zip(obs, buffer):
_encode_obs(o, b)
elif isinstance(obs, dict) and isinstance(buffer, dict):
for k in obs.keys():
_encode_obs(obs[k], buffer[k])
return None
parent.close()
env = env_fn_wrapper.data()
try:
while True:
try:
cmd, data = p.recv()
except EOFError: # the pipe has been closed
p.close()
break
if cmd == "step":
obs, reward, done, info = env.step(data)
if obs_bufs is not None:
_encode_obs(obs, obs_bufs)
obs = None
p.send((obs, reward, done, info))
elif cmd == "reset":
obs = env.reset(data)
if obs_bufs is not None:
_encode_obs(obs, obs_bufs)
obs = None
p.send(obs)
elif cmd == "close":
p.send(env.close())
p.close()
break
elif cmd == "render":
p.send(env.render(**data) if hasattr(env, "render") else None)
elif cmd == "seed":
p.send(env.seed(data) if hasattr(env, "seed") else None)
elif cmd == "getattr":
p.send(getattr(env, data) if hasattr(env, data) else None)
elif cmd == "toggle_log":
env.toggle_log(data)
else:
p.close()
raise NotImplementedError
except KeyboardInterrupt:
p.close()
class SubprocEnvWorker(EnvWorker):
"""Subprocess worker used in SubprocVectorEnv and ShmemVectorEnv."""
def __init__(self, env_fn: Callable[[], gym.Env], share_memory: bool = False) -> None:
super().__init__(env_fn)
self.parent_remote, self.child_remote = Pipe()
self.share_memory = share_memory
self.buffer: Optional[Union[dict, tuple, ShArray]] = None
if self.share_memory:
dummy = env_fn()
obs_space = dummy.observation_space
dummy.close()
del dummy
self.buffer = _setup_buf(obs_space)
args = (
self.parent_remote,
self.child_remote,
CloudpickleWrapper(env_fn),
self.buffer,
)
self.process = Process(target=_worker, args=args, daemon=True)
self.process.start()
self.child_remote.close()
def __getattr__(self, key: str) -> Any:
self.parent_remote.send(["getattr", key])
return self.parent_remote.recv()
def _decode_obs(self) -> Union[dict, tuple, np.ndarray]:
""" """
def decode_obs(buffer: Optional[Union[dict, tuple, ShArray]]) -> Union[dict, tuple, np.ndarray]:
"""
:param buffer: Optional[Union[dict:
:param tuple: param ShArray]]:
:param buffer: Optional[Union[dict:
:param ShArray]]:
:param buffer: Optional[Union[dict:
"""
if isinstance(buffer, ShArray):
return buffer.get()
elif isinstance(buffer, tuple):
return tuple([decode_obs(b) for b in buffer])
elif isinstance(buffer, dict):
return {k: decode_obs(v) for k, v in buffer.items()}
else:
raise NotImplementedError
return decode_obs(self.buffer)
def reset(self, sample) -> Any:
"""
:param sample:
"""
self.parent_remote.send(["reset", sample])
# obs = self.parent_remote.recv()
# if self.share_memory:
# obs = self._decode_obs()
# return obs
def get_reset_result(self):
""" """
obs = self.parent_remote.recv()
if self.share_memory:
obs = self._decode_obs()
return obs
@staticmethod
def wait( # type: ignore
workers: List["SubprocEnvWorker"], wait_num: int, timeout: Optional[float] = None,
) -> List["SubprocEnvWorker"]:
"""
:param # type: ignoreworkers: List["SubprocEnvWorker"]:
:param wait_num: int:
:param timeout: Optional[float]: (Default value = None)
:param # type: ignoreworkers: List["SubprocEnvWorker"]:
:param wait_num: int:
:param timeout: Optional[float]: (Default value = None)
"""
remain_conns = conns = [x.parent_remote for x in workers]
ready_conns: List[connection.Connection] = []
remain_time, t1 = timeout, time.time()
while len(remain_conns) > 0 and len(ready_conns) < wait_num:
if timeout:
remain_time = timeout - (time.time() - t1)
if remain_time <= 0:
break
# connection.wait hangs if the list is empty
new_ready_conns = connection.wait(remain_conns, timeout=remain_time)
ready_conns.extend(new_ready_conns) # type: ignore
remain_conns = [conn for conn in remain_conns if conn not in ready_conns]
return [workers[conns.index(con)] for con in ready_conns]
def send_action(self, action: np.ndarray) -> None:
"""
:param action: np.ndarray:
:param action: np.ndarray:
:param action: np.ndarray:
"""
self.parent_remote.send(["step", action])
def toggle_log(self, log):
self.parent_remote.send(["toggle_log", log])
def get_result(self,) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
""" """
obs, rew, done, info = self.parent_remote.recv()
if self.share_memory:
obs = self._decode_obs()
return obs, rew, done, info
def seed(self, seed: Optional[int] = None) -> Optional[List[int]]:
"""
:param seed: Optional[int]: (Default value = None)
:param seed: Optional[int]: (Default value = None)
:param seed: Optional[int]: (Default value = None)
"""
self.parent_remote.send(["seed", seed])
return self.parent_remote.recv()
def render(self, **kwargs: Any) -> Any:
"""
:param **kwargs: Any:
:param **kwargs: Any:
"""
self.parent_remote.send(["render", kwargs])
return self.parent_remote.recv()
def close_env(self) -> None:
""" """
try:
self.parent_remote.send(["close", None])
# mp may be deleted so it may raise AttributeError
self.parent_remote.recv()
self.process.join()
except (BrokenPipeError, EOFError, AttributeError):
pass
# ensure the subproc is terminated
self.process.terminate()
class BaseVectorEnv(gym.Env):
"""Base class for vectorized environments wrapper.
Usage:
::
env_num = 8
envs = DummyVectorEnv([lambda: gym.make(task) for _ in range(env_num)])
assert len(envs) == env_num
It accepts a list of environment generators. In other words, an environment
generator ``efn`` of a specific task means that ``efn()`` returns the
environment of the given task, for example, ``gym.make(task)``.
All of the VectorEnv must inherit :class:`~tianshou.env.BaseVectorEnv`.
Here are some other usages:
::
envs.seed(2) # which is equal to the next line
envs.seed([2, 3, 4, 5, 6, 7, 8, 9]) # set specific seed for each env
obs = envs.reset() # reset all environments
obs = envs.reset([0, 5, 7]) # reset 3 specific environments
obs, rew, done, info = envs.step([1] * 8) # step synchronously
envs.render() # render all environments
envs.close() # close all environments
.. warning::
If you use your own environment, please make sure the ``seed`` method
is set up properly, e.g.,
::
def seed(self, seed):
np.random.seed(seed)
Otherwise, the outputs of these envs may be the same with each other.
:param env_fns: a list of callable envs
:param env:
:param worker_fn: a callable worker
:param worker: which contains the i
:param int: wait_num
:param env: step
:param environments: to finish a step is time
:param return: when
:param simulation: in these environments
:param is: disabled
:param float: timeout
:param vectorized: step it only deal with those environments spending time
:param within: timeout
"""
def __init__(
self,
env_fns: List[Callable[[], gym.Env]],
worker_fn: Callable[[Callable[[], gym.Env]], EnvWorker],
sampler=None,
testing: Optional[bool] = False,
wait_num: Optional[int] = None,
timeout: Optional[float] = None,
) -> None:
self._env_fns = env_fns
# A VectorEnv contains a pool of EnvWorkers, which corresponds to
# interact with the given envs (one worker <-> one env).
self.workers = [worker_fn(fn) for fn in env_fns]
self.worker_class = type(self.workers[0])
assert issubclass(self.worker_class, EnvWorker)
assert all([isinstance(w, self.worker_class) for w in self.workers])
self.env_num = len(env_fns)
self.wait_num = wait_num or len(env_fns)
assert 1 <= self.wait_num <= len(env_fns), f"wait_num should be in [1, {len(env_fns)}], but got {wait_num}"
self.timeout = timeout
assert self.timeout is None or self.timeout > 0, f"timeout is {timeout}, it should be positive if provided!"
self.is_async = self.wait_num != len(env_fns) or timeout is not None or testing
self.waiting_conn: List[EnvWorker] = []
# environments in self.ready_id is actually ready
# but environments in self.waiting_id are just waiting when checked,
# and they may be ready now, but this is not known until we check it
# in the step() function
self.waiting_id: List[int] = []
# all environments are ready in the beginning
self.ready_id = list(range(self.env_num))
self.is_closed = False
self.sampler = sampler
self.sample_obs = None
def _assert_is_not_closed(self) -> None:
""" """
assert not self.is_closed, f"Methods of {self.__class__.__name__} cannot be called after " "close."
def __len__(self) -> int:
"""Return len(self), which is the number of environments."""
return self.env_num
def __getattribute__(self, key: str) -> Any:
"""Switch the attribute getter depending on the key.
Any class who inherits ``gym.Env`` will inherit some attributes, like
``action_space``. However, we would like the attribute lookup to go
straight into the worker (in fact, this vector env's action_space is
always None).
"""
if key in [
"metadata",
"reward_range",
"spec",
"action_space",
"observation_space",
]: # reserved keys in gym.Env
return self.__getattr__(key)
else:
return super().__getattribute__(key)
def __getattr__(self, key: str) -> List[Any]:
"""Fetch a list of env attributes.
This function tries to retrieve an attribute from each individual
wrapped environment, if it does not belong to the wrapping vector
environment class.
"""
return [getattr(worker, key) for worker in self.workers]
def _wrap_id(self, id: Optional[Union[int, List[int], np.ndarray]] = None) -> Union[List[int], np.ndarray]:
"""
:param id: Optional[Union[int:
:param List: int]:
:param np: ndarray]]: (Default value = None)
:param id: Optional[Union[int:
:param List[int]:
:param np.ndarray]]: (Default value = None)
:param id: Optional[Union[int:
"""
if id is None:
id = list(range(self.env_num))
elif np.isscalar(id):
id = [id]
return id
def _assert_id(self, id: List[int]) -> None:
"""
:param id: List[int]:
:param id: List[int]:
:param id: List[int]:
"""
for i in id:
assert i not in self.waiting_id, f"Cannot interact with environment {i} which is stepping now."
assert i in self.ready_id, f"Can only interact with ready environments {self.ready_id}."
def reset(self, id: Optional[Union[int, List[int], np.ndarray]] = None) -> np.ndarray:
"""Reset the state of some envs and return initial observations.
If id is None, reset the state of all the environments and return
initial observations, otherwise reset the specific environments with
the given id, either an int or a list.
:param id: Optional[Union[int:
:param List: int]:
:param np: ndarray]]: (Default value = None)
:param id: Optional[Union[int:
:param List[int]:
:param np.ndarray]]: (Default value = None)
:param id: Optional[Union[int:
"""
start_time = time.time()
self._assert_is_not_closed()
id = self._wrap_id(id)
if self.is_async:
self._assert_id(id)
obs = []
stop_id = []
for i in id:
sample = self.sampler.sample()
if sample is None:
stop_id.append(i)
else:
self.workers[i].reset(sample)
for i in id:
if i in stop_id:
obs.append(self.sample_obs)
else:
this_obs = self.workers[i].get_reset_result()
if self.sample_obs is None:
self.sample_obs = this_obs
for j in range(len(obs)):
if obs[j] is None:
obs[j] = self.sample_obs
obs.append(this_obs)
if len(obs) > 0:
obs = np.stack(obs)
# if len(stop_id)> 0:
# obs_zero =
# print(time.time() - start_timed)
return obs, stop_id
def toggle_log(self, log):
for worker in self.workers:
worker.toggle_log(log)
def reset_sampler(self):
""" """
self.sampler.reset()
def step(self, action: np.ndarray, id: Optional[Union[int, List[int], np.ndarray]] = None) -> List[np.ndarray]:
"""Run one timestep of some environments' dynamics.
If id is None, run one timestep of all the environments dynamics;
otherwise run one timestep for some environments with given id, either
an int or a list. When the end of episode is reached, you are
responsible for calling reset(id) to reset this environments state.
Accept a batch of action and return a tuple (batch_obs, batch_rew,
batch_done, batch_info) in numpy format.
:param numpy: ndarray action: a batch of action provided by the agent.
:param action: np.ndarray:
:param id: Optional[Union[int:
:param List: int]:
:param np: ndarray]]: (Default value = None)
:param action: np.ndarray:
:param id: Optional[Union[int:
:param List[int]:
:param np.ndarray]]: (Default value = None)
:param action: np.ndarray:
:param id: Optional[Union[int:
:rtype: A tuple including four items
"""
self._assert_is_not_closed()
id = self._wrap_id(id)
if not self.is_async:
assert len(action) == len(id)
for i, j in enumerate(id):
self.workers[j].send_action(action[i])
result = []
for j in id:
obs, rew, done, info = self.workers[j].get_result()
info["env_id"] = j
result.append((obs, rew, done, info))
else:
if action is not None:
self._assert_id(id)
assert len(action) == len(id)
for i, (act, env_id) in enumerate(zip(action, id)):
self.workers[env_id].send_action(act)
self.waiting_conn.append(self.workers[env_id])
self.waiting_id.append(env_id)
self.ready_id = [x for x in self.ready_id if x not in id]
ready_conns: List[EnvWorker] = []
while not ready_conns:
ready_conns = self.worker_class.wait(self.waiting_conn, self.wait_num, self.timeout)
result = []
for conn in ready_conns:
waiting_index = self.waiting_conn.index(conn)
self.waiting_conn.pop(waiting_index)
env_id = self.waiting_id.pop(waiting_index)
obs, rew, done, info = conn.get_result()
info["env_id"] = env_id
result.append((obs, rew, done, info))
self.ready_id.append(env_id)
return list(map(np.stack, zip(*result)))
def seed(self, seed: Optional[Union[int, List[int]]] = None) -> List[Optional[List[int]]]:
"""Set the seed for all environments.
Accept ``None``, an int (which will extend ``i`` to
``[i, i + 1, i + 2, ...]``) or a list.
:param seed: Optional[Union[int:
:param List: int]]]: (Default value = None)
:param seed: Optional[Union[int:
:param List[int]]]: (Default value = None)
:param seed: Optional[Union[int:
:returns: The list of seeds used in this env's random number generators.
The first value in the list should be the "main" seed, or the value
which a reproducer pass to "seed".
"""
self._assert_is_not_closed()
seed_list: Union[List[None], List[int]]
if seed is None:
seed_list = [seed] * self.env_num
elif isinstance(seed, int):
seed_list = [seed + i for i in range(self.env_num)]
else:
seed_list = seed
return [w.seed(s) for w, s in zip(self.workers, seed_list)]
def render(self, **kwargs: Any) -> List[Any]:
"""Render all of the environments.
:param **kwargs: Any:
:param **kwargs: Any:
"""
self._assert_is_not_closed()
if self.is_async and len(self.waiting_id) > 0:
raise RuntimeError(f"Environments {self.waiting_id} are still stepping, cannot " "render them now.")
return [w.render(**kwargs) for w in self.workers]
def close(self) -> None:
"""Close all of the environments.
This function will be called only once (if not, it will be called
during garbage collected). This way, ``close`` of all workers can be
assured.
"""
self._assert_is_not_closed()
for w in self.workers:
w.close()
self.is_closed = True
def __del__(self) -> None:
"""Redirect to self.close()."""
if not self.is_closed:
self.close()
class SubprocVectorEnv(BaseVectorEnv):
"""Vectorized environment wrapper based on subprocess.
.. seealso::
Please refer to :class:`~tianshou.env.BaseVectorEnv` for more detailed
explanation.
"""
def __init__(
self,
env_fns: List[Callable[[], gym.Env]],
sampler=None,
testing=False,
wait_num: Optional[int] = None,
timeout: Optional[float] = None,
) -> None:
def worker_fn(fn: Callable[[], gym.Env]) -> SubprocEnvWorker:
"""
:param fn: Callable[[]:
:param gym: Env]:
:param fn: Callable[[]:
:param gym.Env]:
:param fn: Callable[[]:
"""
return SubprocEnvWorker(fn, share_memory=False)
super().__init__(env_fns, worker_fn, sampler, testing, wait_num=wait_num, timeout=timeout)
class ShmemVectorEnv(BaseVectorEnv):
"""Optimized SubprocVectorEnv with shared buffers to exchange observations.
ShmemVectorEnv has exactly the same API as SubprocVectorEnv.
.. seealso::
Please refer to :class:`~tianshou.env.SubprocVectorEnv` for more
detailed explanation.
"""
def __init__(
self,
env_fns: List[Callable[[], gym.Env]],
sampler=None,
testing=False,
wait_num: Optional[int] = None,
timeout: Optional[float] = None,
) -> None:
def worker_fn(fn: Callable[[], gym.Env]) -> SubprocEnvWorker:
"""
:param fn: Callable[[]:
:param gym: Env]:
:param fn: Callable[[]:
:param gym.Env]:
:param fn: Callable[[]:
"""
return SubprocEnvWorker(fn, share_memory=True)
super().__init__(env_fns, worker_fn, sampler, testing, wait_num=wait_num, timeout=timeout)

View File

@@ -17,7 +17,7 @@ from qlib.contrib.evaluate import (
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.tests.data import GetData
if __name__ == "__main__":
@@ -25,9 +25,6 @@ if __name__ == "__main__":
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)
@@ -98,6 +95,7 @@ if __name__ == "__main__":
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
"return_order": True,
},
}
@@ -105,6 +103,11 @@ if __name__ == "__main__":
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
# NOTE: This line is optional
# It demonstrates that the dataset can be used standalone.
example_df = dataset.prepare("train")
print(example_df.head())
# start exp
with R.start(experiment_name="workflow"):
R.log_params(**flatten_dict(task))

View File

@@ -2,92 +2,49 @@
# Licensed under the MIT License.
__version__ = "0.6.1"
__version__ = "0.6.1.99"
import os
import re
import sys
import copy
import yaml
import logging
import platform
import subprocess
from pathlib import Path
from .utils import can_use_cache, init_instance_by_config, get_module_by_module_path
from .workflow.utils import experiment_exit_handler
# init qlib
def init(default_conf="client", **kwargs):
from .config import C, REG_CN, REG_US, QlibConfig
from .data.data import register_all_wrappers
from .log import get_module_logger, set_log_with_config
from .config import C
from .log import get_module_logger
from .data.cache import H
from .workflow import R, QlibRecorder
C.reset()
H.clear()
_logging_config = C.logging_config
if "logging_config" in kwargs:
_logging_config = kwargs["logging_config"]
# set global config
if _logging_config:
set_log_with_config(_logging_config)
# FIXME: this logger ignored the level in config
LOG = get_module_logger("Initialization", level=logging.INFO)
LOG.info(f"default_conf: {default_conf}.")
logger = get_module_logger("Initialization", level=logging.INFO)
C.set_mode(default_conf)
C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN))
for k, v in kwargs.items():
C[k] = v
if k not in C:
LOG.warning("Unrecognized config %s" % k)
C.resolve_path()
if not (C["expression_cache"] is None and C["dataset_cache"] is None):
# check redis
if not can_use_cache():
LOG.warning(
f"redis connection failed(host={C['redis_host']} port={C['redis_port']}), cache will not be used!"
)
C["expression_cache"] = None
C["dataset_cache"] = None
C.set(default_conf, **kwargs)
# check path if server/local
if C.get_uri_type() == QlibConfig.LOCAL_URI:
if C.get_uri_type() == C.LOCAL_URI:
if not os.path.exists(C["provider_uri"]):
if C["auto_mount"]:
LOG.error(
logger.error(
f"Invalid provider uri: {C['provider_uri']}, please check if a valid provider uri has been set. This path does not exist."
)
else:
LOG.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
elif C.get_uri_type() == QlibConfig.NFS_URI:
logger.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
elif C.get_uri_type() == C.NFS_URI:
_mount_nfs_uri(C)
else:
raise NotImplementedError(f"This type of URI is not supported")
LOG.info("qlib successfully initialized based on %s settings." % default_conf)
register_all_wrappers()
LOG.info(f"data_path={C.get_data_path()}")
C.register()
if "flask_server" in C:
LOG.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
# set up QlibRecorder
exp_manager = init_instance_by_config(C["exp_manager"])
qr = QlibRecorder(exp_manager)
R.register(qr)
# clean up experiment when python program ends
experiment_exit_handler()
logger.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
logger.info("qlib successfully initialized based on %s settings." % default_conf)
logger.info(f"data_path={C.get_data_path()}")
def _mount_nfs_uri(C):

View File

@@ -11,26 +11,27 @@ Two modes are supported
"""
import copy
from pathlib import Path
import re
import os
import re
import copy
import logging
import multiprocessing
from pathlib import Path
class Config:
def __init__(self, default_conf):
self.__dict__["_default_config"] = default_conf # avoiding conflictions with __getattr__
self.__dict__["_default_config"] = copy.deepcopy(default_conf) # avoiding conflictions with __getattr__
self.reset()
def __getitem__(self, key):
return self.__dict__["_config"][key]
def __getattr__(self, attr):
try:
if attr in self.__dict__["_config"]:
return self.__dict__["_config"][attr]
except KeyError:
return AttributeError(f"No such {attr} in self._config")
raise AttributeError(f"No such {attr} in self._config")
def __setitem__(self, key, value):
self.__dict__["_config"][key] = value
@@ -59,6 +60,9 @@ class Config:
def update(self, *args, **kwargs):
self.__dict__["_config"].update(*args, **kwargs)
def set_conf_from_C(self, config_c):
self.update(**config_c.__dict__["_config"])
# REGION CONST
REG_CN = "cn"
@@ -86,7 +90,6 @@ _default_config = {
# How many tasks belong to one process. Recommend 1 for high-frequency data and None for daily data.
"maxtasksperchild": None,
"default_disk_cache": 1, # 0:skip/1:use
"disable_disk_cache": False, # disable disk cache; if High-frequency data generally disable_disk_cache=True
"mem_cache_size_limit": 500,
# memory cache expire second, only in used 'DatasetURICache' and 'client D.calendar'
# default 1 hour
@@ -184,9 +187,17 @@ MODE_CONF = {
"timeout": 100,
"logging_level": "INFO",
"region": REG_CN,
## Custom Operator
"custom_ops": [],
},
}
HIGH_FREQ_CONFIG = {
"provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min",
"dataset_cache": None,
"expression_cache": "DiskExpressionCache",
"region": REG_CN,
}
_default_region_config = {
REG_CN: {
@@ -207,6 +218,10 @@ class QlibConfig(Config):
LOCAL_URI = "local"
NFS_URI = "nfs"
def __init__(self, default_conf):
super().__init__(default_conf)
self._registered = False
def set_mode(self, mode):
# raise KeyError
self.update(MODE_CONF[mode])
@@ -243,6 +258,64 @@ class QlibConfig(Config):
else:
raise NotImplementedError(f"This type of uri is not supported")
def set(self, default_conf="client", **kwargs):
from .utils import set_log_with_config, get_module_logger, can_use_cache
self.reset()
_logging_config = self.logging_config
if "logging_config" in kwargs:
_logging_config = kwargs["logging_config"]
# set global config
if _logging_config:
set_log_with_config(_logging_config)
# FIXME: this logger ignored the level in config
logger = get_module_logger("Initialization", level=logging.INFO)
logger.info(f"default_conf: {default_conf}.")
self.set_mode(default_conf)
self.set_region(kwargs.get("region", self["region"] if "region" in self else REG_CN))
for k, v in kwargs.items():
if k not in self:
logger.warning("Unrecognized config %s" % k)
self[k] = v
self.resolve_path()
if not (self["expression_cache"] is None and self["dataset_cache"] is None):
# check redis
if not can_use_cache():
logger.warning(
f"redis connection failed(host={self['redis_host']} port={self['redis_port']}), cache will not be used!"
)
self["expression_cache"] = None
self["dataset_cache"] = None
def register(self):
from .utils import init_instance_by_config
from .data.ops import register_all_ops
from .data.data import register_all_wrappers
from .workflow import R, QlibRecorder
from .workflow.utils import experiment_exit_handler
register_all_ops(self)
register_all_wrappers(self)
# set up QlibRecorder
exp_manager = init_instance_by_config(self["exp_manager"])
qr = QlibRecorder(exp_manager)
R.register(qr)
# clean up experiment when python program ends
experiment_exit_handler()
self._registered = True
@property
def registered(self):
return self._registered
# global config
C = QlibConfig(_default_config)

View File

@@ -1,9 +1,324 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# -*- coding: utf-8 -*-
from .order import Order
from .account import Account
from .position import Position
from .exchange import Exchange
from .report import Report
from .backtest import backtest as backtest_func, get_date_range
import numpy as np
import inspect
from ...utils import init_instance_by_config
from ...log import get_module_logger
from ...config import C
logger = get_module_logger("backtest caller")
def get_strategy(
strategy=None,
topk=50,
margin=0.5,
n_drop=5,
risk_degree=0.95,
str_type="dropout",
adjust_dates=None,
):
"""get_strategy
There will be 3 ways to return a stratgy. Please follow the code.
Parameters
----------
strategy : Strategy()
strategy used in backtest.
topk : int (Default value: 50)
top-N stocks to buy.
margin : int or float(Default value: 0.5)
- if isinstance(margin, int):
sell_limit = margin
- else:
sell_limit = pred_in_a_day.count() * margin
buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
sell_limit should be no less than topk.
n_drop : int
number of stocks to be replaced in each trading date.
risk_degree: float
0-1, 0.95 for example, use 95% money to trade.
str_type: 'amount', 'weight' or 'dropout'
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
Returns
-------
:class: Strategy
an initialized strategy object
"""
# There will be 3 ways to return a strategy.
if strategy is None:
# 1) create strategy with param `strategy`
str_cls_dict = {
"amount": "TopkAmountStrategy",
"weight": "TopkWeightStrategy",
"dropout": "TopkDropoutStrategy",
}
logger.info("Create new strategy ")
from .. import strategy as strategy_pool
str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
strategy = str_cls(
topk=topk,
buffer_margin=margin,
n_drop=n_drop,
risk_degree=risk_degree,
adjust_dates=adjust_dates,
)
elif isinstance(strategy, (dict, str)):
# 2) create strategy with init_instance_by_config
logger.info("Create new strategy ")
strategy = init_instance_by_config(strategy)
from ..strategy.strategy import BaseStrategy
# else: nothing happens. 3) Use the strategy directly
if not isinstance(strategy, BaseStrategy):
raise TypeError("Strategy not supported")
return strategy
def get_exchange(
pred,
exchange=None,
subscribe_fields=[],
open_cost=0.0015,
close_cost=0.0025,
min_cost=5.0,
trade_unit=None,
limit_threshold=None,
deal_price=None,
extract_codes=False,
shift=1,
):
"""get_exchange
Parameters
----------
# exchange related arguments
exchange: Exchange().
subscribe_fields: list
subscribe fields.
open_cost : float
open transaction cost.
close_cost : float
close transaction cost.
min_cost : float
min transaction cost.
trade_unit : int
100 for China A.
deal_price: str
dealing price type: 'close', 'open', 'vwap'.
limit_threshold : float
limit move 0.1 (10%) for example, long and short with same limit.
extract_codes: bool
will we pass the codes extracted from the pred to the exchange.
NOTE: This will be faster with offline qlib.
Returns
-------
:class: Exchange
an initialized Exchange object
"""
if trade_unit is None:
trade_unit = C.trade_unit
if limit_threshold is None:
limit_threshold = C.limit_threshold
if deal_price is None:
deal_price = C.deal_price
if exchange is None:
logger.info("Create new exchange")
# handle exception for deal_price
if deal_price[0] != "$":
deal_price = "$" + deal_price
if extract_codes:
codes = sorted(pred.index.get_level_values("instrument").unique())
else:
codes = "all" # TODO: We must ensure that 'all.txt' includes all the stocks
dates = sorted(pred.index.get_level_values("datetime").unique())
dates = np.append(dates, get_date_range(dates[-1], left_shift=1, right_shift=shift))
exchange = Exchange(
trade_dates=dates,
codes=codes,
deal_price=deal_price,
subscribe_fields=subscribe_fields,
limit_threshold=limit_threshold,
open_cost=open_cost,
close_cost=close_cost,
min_cost=min_cost,
trade_unit=trade_unit,
)
return exchange
def get_executor(
executor=None,
trade_exchange=None,
verbose=True,
):
"""get_executor
There will be 3 ways to return a executor. Please follow the code.
Parameters
----------
executor : BaseExecutor
executor used in backtest.
trade_exchange : Exchange
exchange used in executor
verbose : bool
whether to print log.
Returns
-------
:class: BaseExecutor
an initialized BaseExecutor object
"""
# There will be 3 ways to return a executor.
if executor is None:
# 1) create executor with param `executor`
logger.info("Create new executor ")
from ..online.executor import SimulatorExecutor
executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose)
elif isinstance(executor, (dict, str)):
# 2) create executor with config
logger.info("Create new executor ")
executor = init_instance_by_config(executor)
from ..online.executor import BaseExecutor
# 3) Use the executor directly
if not isinstance(executor, BaseExecutor):
raise TypeError("Executor not supported")
return executor
# This is the API for compatibility for legacy code
def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs):
"""This function will help you set a reasonable Exchange and provide default value for strategy
Parameters
----------
- **backtest workflow related or commmon arguments**
pred : pandas.DataFrame
predict should has <datetime, instrument> index and one `score` column.
account : float
init account value.
shift : int
whether to shift prediction by one day.
benchmark : str
benchmark code, default is SH000905 CSI 500.
verbose : bool
whether to print log.
return_order : bool
whether to return order list
- **strategy related arguments**
strategy : Strategy()
strategy used in backtest.
topk : int (Default value: 50)
top-N stocks to buy.
margin : int or float(Default value: 0.5)
- if isinstance(margin, int):
sell_limit = margin
- else:
sell_limit = pred_in_a_day.count() * margin
buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
sell_limit should be no less than topk.
n_drop : int
number of stocks to be replaced in each trading date.
risk_degree: float
0-1, 0.95 for example, use 95% money to trade.
str_type: 'amount', 'weight' or 'dropout'
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
- **exchange related arguments**
exchange: Exchange()
pass the exchange for speeding up.
subscribe_fields: list
subscribe fields.
open_cost : float
open transaction cost. The default value is 0.002(0.2%).
close_cost : float
close transaction cost. The default value is 0.002(0.2%).
min_cost : float
min transaction cost.
trade_unit : int
100 for China A.
deal_price: str
dealing price type: 'close', 'open', 'vwap'.
limit_threshold : float
limit move 0.1 (10%) for example, long and short with same limit.
extract_codes: bool
will we pass the codes extracted from the pred to the exchange.
.. note:: This will be faster with offline qlib.
- **executor related arguments**
executor : BaseExecutor()
executor used in backtest.
verbose : bool
whether to print log.
"""
# check strategy:
spec = inspect.getfullargspec(get_strategy)
str_args = {k: v for k, v in kwargs.items() if k in spec.args}
strategy = get_strategy(**str_args)
# init exchange:
spec = inspect.getfullargspec(get_exchange)
ex_args = {k: v for k, v in kwargs.items() if k in spec.args}
trade_exchange = get_exchange(pred, **ex_args)
# init executor:
executor = get_executor(executor=kwargs.get("executor"), trade_exchange=trade_exchange, verbose=verbose)
# run backtest
report_dict = backtest_func(
pred=pred,
strategy=strategy,
executor=executor,
trade_exchange=trade_exchange,
shift=shift,
verbose=verbose,
account=account,
benchmark=benchmark,
return_order=return_order,
)
# for compatibility of the old API. return the dict positions
positions = report_dict.get("positions")
report_dict.update({"positions": {k: p.position for k, p in positions.items()}})
return report_dict

View File

@@ -5,7 +5,6 @@
import numpy as np
import pandas as pd
from ...utils import get_date_by_shift, get_date_range
from ..online.executor import SimulatorExecutor
from ...data import D
from .account import Account
from ...config import C
@@ -15,7 +14,7 @@ from ...data.dataset.utils import get_level_index
LOG = get_module_logger("backtest")
def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark):
def backtest(pred, strategy, executor, trade_exchange, shift, verbose, account, benchmark, return_order):
"""Parameters
----------
pred : pandas.DataFrame
@@ -69,9 +68,9 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark")
bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean()
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift))
executor = SimulatorExecutor(trade_exchange, verbose=verbose)
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))
if return_order:
multi_order_list = []
# trading apart
for pred_date, trade_date in zip(predict_dates, trade_dates):
# for loop predict date and trading date
@@ -103,6 +102,8 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
)
else:
order_list = []
if return_order:
multi_order_list.append((trade_account, order_list, trade_date))
# 4. Get result after executing order list
# NOTE: The following operation will modify order.amount.
# NOTE: If it is buy and the cash is insufficient, the tradable amount will be recalculated
@@ -115,7 +116,11 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
report_df = trade_account.report.generate_report_dataframe()
report_df["bench"] = bench
positions = trade_account.get_positions()
return report_df, positions
report_dict = {"report_df": report_df, "positions": positions}
if return_order:
report_dict.update({"order_list": multi_order_list})
return report_dict
def update_account(trade_account, trade_info, trade_exchange, trade_date):

View File

@@ -49,6 +49,7 @@ class Alpha360(DataHandlerLP):
instruments="csi500",
start_time=None,
end_time=None,
freq="day",
infer_processors=_DEFAULT_INFER_PROCESSORS,
learn_processors=_DEFAULT_LEARN_PROCESSORS,
fit_start_time=None,
@@ -69,9 +70,10 @@ class Alpha360(DataHandlerLP):
}
super().__init__(
instruments,
start_time,
end_time,
instruments=instruments,
start_time=start_time,
end_time=end_time,
freq="day",
data_loader=data_loader,
learn_processors=learn_processors,
infer_processors=infer_processors,
@@ -130,6 +132,7 @@ class Alpha158(DataHandlerLP):
instruments="csi500",
start_time=None,
end_time=None,
freq="day",
infer_processors=[],
learn_processors=_DEFAULT_LEARN_PROCESSORS,
fit_start_time=None,
@@ -147,9 +150,10 @@ class Alpha158(DataHandlerLP):
},
}
super().__init__(
instruments,
start_time,
end_time,
instruments=instruments,
start_time=start_time,
end_time=end_time,
freq=freq,
data_loader=data_loader,
infer_processors=infer_processors,
learn_processors=learn_processors,

View File

@@ -6,17 +6,16 @@ from __future__ import print_function
import numpy as np
import pandas as pd
import inspect
import warnings
from ..log import get_module_logger
from . import strategy as strategy_pool
from .strategy.strategy import BaseStrategy
from .backtest.exchange import Exchange
from .backtest.backtest import backtest as backtest_func, get_date_range
from .backtest import get_exchange, backtest as backtest_func
from .backtest.backtest import get_date_range
from ..data import D
from ..config import C
from ..data.dataset.utils import get_level_index
logger = get_module_logger("Evaluate")
@@ -46,144 +45,6 @@ def risk_analysis(r, N=252):
return res
def get_strategy(
strategy=None,
topk=50,
margin=0.5,
n_drop=5,
risk_degree=0.95,
str_type="amount",
adjust_dates=None,
):
"""get_strategy
Parameters
----------
strategy : Strategy()
strategy used in backtest.
topk : int (Default value: 50)
top-N stocks to buy.
margin : int or float(Default value: 0.5)
- if isinstance(margin, int):
sell_limit = margin
- else:
sell_limit = pred_in_a_day.count() * margin
buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
sell_limit should be no less than topk.
n_drop : int
number of stocks to be replaced in each trading date.
risk_degree: float
0-1, 0.95 for example, use 95% money to trade.
str_type: 'amount', 'weight' or 'dropout'
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
Returns
-------
:class: Strategy
an initialized strategy object
"""
if strategy is None:
str_cls_dict = {
"amount": "TopkAmountStrategy",
"weight": "TopkWeightStrategy",
"dropout": "TopkDropoutStrategy",
}
logger.info("Create new streategy ")
str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
strategy = str_cls(
topk=topk,
buffer_margin=margin,
n_drop=n_drop,
risk_degree=risk_degree,
adjust_dates=adjust_dates,
)
if not isinstance(strategy, BaseStrategy):
raise TypeError("Strategy not supported")
return strategy
def get_exchange(
pred,
exchange=None,
subscribe_fields=[],
open_cost=0.0015,
close_cost=0.0025,
min_cost=5.0,
trade_unit=None,
limit_threshold=None,
deal_price=None,
extract_codes=False,
shift=1,
):
"""get_exchange
Parameters
----------
# exchange related arguments
exchange: Exchange().
subscribe_fields: list
subscribe fields.
open_cost : float
open transaction cost.
close_cost : float
close transaction cost.
min_cost : float
min transaction cost.
trade_unit : int
100 for China A.
deal_price: str
dealing price type: 'close', 'open', 'vwap'.
limit_threshold : float
limit move 0.1 (10%) for example, long and short with same limit.
extract_codes: bool
will we pass the codes extracted from the pred to the exchange.
NOTE: This will be faster with offline qlib.
Returns
-------
:class: Exchange
an initialized Exchange object
"""
if trade_unit is None:
trade_unit = C.trade_unit
if limit_threshold is None:
limit_threshold = C.limit_threshold
if deal_price is None:
deal_price = C.deal_price
if exchange is None:
logger.info("Create new exchange")
# handle exception for deal_price
if deal_price[0] != "$":
deal_price = "$" + deal_price
if extract_codes:
codes = sorted(pred.index.get_level_values("instrument").unique())
else:
codes = "all" # TODO: We must ensure that 'all.txt' includes all the stocks
dates = sorted(pred.index.get_level_values("datetime").unique())
dates = np.append(dates, get_date_range(dates[-1], shift=shift))
exchange = Exchange(
trade_dates=dates,
codes=codes,
deal_price=deal_price,
subscribe_fields=subscribe_fields,
limit_threshold=limit_threshold,
open_cost=open_cost,
close_cost=close_cost,
min_cost=min_cost,
trade_unit=trade_unit,
)
return exchange
# This is the API for compatibility for legacy code
def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs):
"""This function will help you set a reasonable Exchange and provide default value for strategy
@@ -249,30 +110,22 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
will we pass the codes extracted from the pred to the exchange.
.. note:: This will be faster with offline qlib.
- **executor related arguments**
executor : BaseExecutor()
executor used in backtest.
verbose : bool
whether to print log.
"""
# check strategy:
spec = inspect.getfullargspec(get_strategy)
str_args = {k: v for k, v in kwargs.items() if k in spec.args}
strategy = get_strategy(**str_args)
# init exchange:
spec = inspect.getfullargspec(get_exchange)
ex_args = {k: v for k, v in kwargs.items() if k in spec.args}
trade_exchange = get_exchange(pred, **ex_args)
# run backtest
report_df, positions = backtest_func(
pred=pred,
strategy=strategy,
trade_exchange=trade_exchange,
shift=shift,
verbose=verbose,
account=account,
benchmark=benchmark,
warnings.warn(
"this function is deprecated, please use backtest function in qlib.contrib.backtest", DeprecationWarning
)
# for compatibility of the old API. return the dict positions
positions = {k: p.position for k, p in positions.items()}
return report_df, positions
report_dict = backtest_func(
pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs
)
return report_dict.get("report_df"), report_dict.get("positions")
def long_short_backtest(
@@ -340,7 +193,7 @@ def long_short_backtest(
_pred_dates = pred.index.get_level_values(level="datetime")
predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max())
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift))
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))
long_returns = {}
short_returns = {}

View File

@@ -204,8 +204,8 @@ class ALSTM(Model):
verbose=True,
save_path=None,
):
dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L)
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
@@ -260,7 +260,7 @@ class ALSTM(Model):
if not self._fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
self.ALSTM_model.eval()

View File

@@ -249,8 +249,8 @@ class GATs(Model):
save_path=None,
):
dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L)
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
@@ -332,7 +332,7 @@ class GATs(Model):
if not self._fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
sampler_test = DailyBatchSampler(dl_test)
test_loader = DataLoader(dl_test, sampler=sampler_test, num_workers=self.n_jobs)

View File

@@ -204,8 +204,8 @@ class GRU(Model):
verbose=True,
save_path=None,
):
dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L)
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
@@ -260,7 +260,7 @@ class GRU(Model):
if not self._fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
self.GRU_model.eval()

View File

@@ -204,8 +204,8 @@ class LSTM(Model):
verbose=True,
save_path=None,
):
dl_train = dataset.prepare("train", data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", data_key=DataHandlerLP.DK_L)
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
@@ -260,7 +260,7 @@ class LSTM(Model):
if not self._fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
self.LSTM_model.eval()

View File

@@ -259,7 +259,7 @@ class DNNModelPytorch(Model):
loss = torch.mul(sqr_loss, w).mean()
return loss
elif loss_type == "binary":
loss = nn.BCELoss()
loss = nn.BCELoss(weight=w)
return loss(pred, target)
else:
raise NotImplementedError("loss {} is not supported!".format(loss_type))
@@ -296,7 +296,7 @@ class DNNModelPytorch(Model):
self._fitted = True
class AverageMeter(object):
class AverageMeter:
"""Computes and stores the average and current value"""
def __init__(self):

View File

@@ -464,7 +464,7 @@ class SFM(Model):
return pd.Series(np.concatenate(preds), index=index)
class AverageMeter(object):
class AverageMeter:
"""Computes and stores the average and current value"""
def __init__(self):

View File

@@ -0,0 +1,642 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import pandas as pd
import copy
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Function
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
class TabnetModel(Model):
def __init__(
self,
d_feat=158,
out_dim=64,
final_out_dim=1,
batch_size=4096,
n_d=64,
n_a=64,
n_shared=2,
n_ind=2,
n_steps=5,
n_epochs=100,
pretrain_n_epochs=50,
relax=1.3,
vbs=2048,
seed=993,
optimizer="adam",
loss="mse",
metric="",
early_stop=20,
GPU="1",
pretrain_loss="custom",
ps=0.3,
lr=0.01,
pretrain=True,
pretrain_file="./pretrain/best.model",
):
"""
TabNet model for Qlib
Args
ps: probability to generate the bernoulli mask
"""
# set hyper-parameters.
self.d_feat = d_feat
self.out_dim = out_dim
self.final_out_dim = final_out_dim
self.lr = lr
self.batch_size = batch_size
self.optimizer = optimizer.lower()
self.pretrain_loss = pretrain_loss
self.seed = seed
self.ps = ps
self.n_epochs = n_epochs
self.logger = get_module_logger("TabNet")
self.pretrain_n_epochs = pretrain_n_epochs
self.device = "cuda:%s" % (GPU) if torch.cuda.is_available() else "cpu"
self.loss = loss
self.metric = metric
self.early_stop = early_stop
self.pretrain = pretrain
self.pretrain_file = pretrain_file
self.logger.info(
"TabNet:"
"\nbatch_size : {}"
"\nvirtual bs : {}"
"\nGPU : {}"
"\npretrain: {}".format(self.batch_size, vbs, GPU, pretrain)
)
np.random.seed(self.seed)
torch.manual_seed(self.seed)
self.tabnet_model = TabNet(
inp_dim=self.d_feat, out_dim=self.out_dim, vbs=vbs, relax=relax, device=self.device
).to(self.device)
self.tabnet_decoder = TabNet_Decoder(self.out_dim, self.d_feat, n_shared, n_ind, vbs, n_steps, self.device).to(
self.device
)
if optimizer.lower() == "adam":
self.pretrain_optimizer = optim.Adam(
list(self.tabnet_model.parameters()) + list(self.tabnet_decoder.parameters()), lr=self.lr
)
self.train_optimizer = optim.Adam(self.tabnet_model.parameters(), lr=self.lr)
elif optimizer.lower() == "gd":
self.pretrain_optimizer = optim.SGD(
list(self.tabnet_model.parameters()) + list(self.tabnet_decoder.parameters()), lr=self.lr
)
self.train_optimizer = optim.SGD(self.tabnet_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
# make a directory if pretrian director does not exist
if pretrain_file.startswith("./pretrain") and not os.path.exists("pretrain"):
self.logger.info("make folder to store model...")
os.makedirs("pretrain")
[df_train, df_valid] = dataset.prepare(
["pretrain", "pretrain_validation"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
df_train.fillna(df_train.mean(), inplace=True)
df_valid.fillna(df_valid.mean(), inplace=True)
x_train = df_train["feature"]
x_valid = df_valid["feature"]
# Early stop setup
stop_steps = 0
train_loss = 0
best_loss = np.inf
for epoch_idx in range(self.pretrain_n_epochs):
self.logger.info("epoch: %s" % (epoch_idx))
self.logger.info("pre-training...")
self.pretrain_epoch(x_train)
self.logger.info("evaluating...")
train_loss = self.pretrain_test_epoch(x_train)
valid_loss = self.pretrain_test_epoch(x_valid)
self.logger.info("train %.6f, valid %.6f" % (train_loss, valid_loss))
if valid_loss < best_loss:
self.logger.info("Save Model...")
torch.save(self.tabnet_model.state_dict(), pretrain_file)
best_loss = valid_loss
else:
stop_steps += 1
if stop_steps >= self.early_stop:
self.logger.info("early stop")
break
def fit(
self,
dataset: DatasetH,
evals_result=dict(),
verbose=True,
save_path=None,
):
if self.pretrain:
# there is a pretrained model, load the model
self.logger.info("Pretrain...")
self.pretrain_fn(dataset, self.pretrain_file)
self.logger.info("Load Pretrain model")
self.tabnet_model.load_state_dict(torch.load(self.pretrain_file))
# adding one more linear layer to fit the final output dimension
self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device)
df_train, df_valid = dataset.prepare(
["train", "valid"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
df_train.fillna(df_train.mean(), inplace=True)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
stop_steps = 0
train_loss = 0
best_score = np.inf
best_epoch = 0
evals_result["train"] = []
evals_result["valid"] = []
self.logger.info("training...")
self._fitted = True
for epoch_idx in range(self.n_epochs):
self.logger.info("epoch: %s" % (epoch_idx))
self.logger.info("training...")
self.train_epoch(x_train, y_train)
self.logger.info("evaluating...")
train_loss, train_score = self.test_epoch(x_train, y_train)
valid_loss, val_score = self.test_epoch(x_valid, y_valid)
self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
evals_result["train"].append(train_score)
evals_result["valid"].append(val_score)
if val_score < best_score:
best_score = val_score
stop_steps = 0
best_epoch = epoch_idx
else:
stop_steps += 1
if stop_steps >= self.early_stop:
self.logger.info("early stop")
break
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
def predict(self, dataset):
if not self._fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.tabnet_model.eval()
x_values = torch.from_numpy(x_test.values)
x_values[torch.isnan(x_values)] = 0
sample_num = x_values.shape[0]
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:
end = begin + self.batch_size
x_batch = x_values[begin:end].float().to(self.device)
priors = torch.ones(end - begin, self.d_feat).to(self.device)
with torch.no_grad():
pred = self.tabnet_model(x_batch, priors).detach().cpu().numpy()
preds.append(pred)
return pd.Series(np.concatenate(preds), index=index)
def test_epoch(self, data_x, data_y):
# prepare training data
x_values = torch.from_numpy(data_x.values)
y_values = torch.from_numpy(np.squeeze(data_y.values))
x_values[torch.isnan(x_values)] = 0
y_values[torch.isnan(y_values)] = 0
self.tabnet_model.eval()
scores = []
losses = []
indices = np.arange(len(x_values))
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
feature = x_values[indices[i : i + self.batch_size]].float().to(self.device)
label = y_values[indices[i : i + self.batch_size]].float().to(self.device)
priors = torch.ones(self.batch_size, self.d_feat).to(self.device)
pred = self.tabnet_model(feature, priors)
loss = self.loss_fn(pred, label)
losses.append(loss.item())
score = self.metric_fn(pred, label)
scores.append(score.item())
return np.mean(losses), np.mean(scores)
def train_epoch(self, x_train, y_train):
x_train_values = torch.from_numpy(x_train.values)
y_train_values = torch.from_numpy(np.squeeze(y_train.values))
x_train_values[torch.isnan(x_train_values)] = 0
y_train_values[torch.isnan(y_train_values)] = 0
self.tabnet_model.train()
indices = np.arange(len(x_train_values))
np.random.shuffle(indices)
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
feature = x_train_values[indices[i : i + self.batch_size]].float().to(self.device)
label = y_train_values[indices[i : i + self.batch_size]].float().to(self.device)
priors = torch.ones(self.batch_size, self.d_feat).to(self.device)
pred = self.tabnet_model(feature, priors)
loss = self.loss_fn(pred, label)
self.train_optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_value_(self.tabnet_model.parameters(), 3.0)
self.train_optimizer.step()
def pretrain_epoch(self, x_train):
train_set = torch.from_numpy(x_train.values)
train_set[torch.isnan(train_set)] = 0
indices = np.arange(len(train_set))
np.random.shuffle(indices)
self.tabnet_model.train()
self.tabnet_decoder.train()
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
S_mask = torch.bernoulli(torch.empty(self.batch_size, self.d_feat).fill_(self.ps))
x_train_values = train_set[indices[i : i + self.batch_size]] * (1 - S_mask)
y_train_values = train_set[indices[i : i + self.batch_size]] * (S_mask)
S_mask = S_mask.to(self.device)
feature = x_train_values.float().to(self.device)
label = y_train_values.float().to(self.device)
priors = 1 - S_mask
(vec, sparse_loss) = self.tabnet_model(feature, priors)
f = self.tabnet_decoder(vec)
loss = self.pretrain_loss_fn(label, f, S_mask)
self.pretrain_optimizer.zero_grad()
loss.backward()
self.pretrain_optimizer.step()
def pretrain_test_epoch(self, x_train):
train_set = torch.from_numpy(x_train.values)
train_set[torch.isnan(train_set)] = 0
indices = np.arange(len(train_set))
self.tabnet_model.eval()
self.tabnet_decoder.eval()
losses = []
for i in range(len(indices))[:: self.batch_size]:
if len(indices) - i < self.batch_size:
break
S_mask = torch.bernoulli(torch.empty(self.batch_size, self.d_feat).fill_(self.ps))
x_train_values = train_set[indices[i : i + self.batch_size]] * (1 - S_mask)
y_train_values = train_set[indices[i : i + self.batch_size]] * (S_mask)
feature = x_train_values.float().to(self.device)
label = y_train_values.float().to(self.device)
S_mask = S_mask.to(self.device)
priors = 1 - S_mask
(vec, sparse_loss) = self.tabnet_model(feature, priors)
f = self.tabnet_decoder(vec)
loss = self.pretrain_loss_fn(label, f, S_mask)
losses.append(loss.item())
return np.mean(losses)
def pretrain_loss_fn(self, f_hat, f, S):
"""
Pretrain loss function defined in the original paper, read "Tabular self-supervised learning" in https://arxiv.org/pdf/1908.07442.pdf
"""
down_mean = torch.mean(f, dim=0)
down = torch.sqrt(torch.sum(torch.square(f - down_mean), dim=0))
up = (f_hat - f) * S
return torch.sum(torch.square(up / down))
def loss_fn(self, pred, label):
mask = ~torch.isnan(label)
if self.loss == "mse":
return self.mse(pred[mask], label[mask])
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
def mse(self, pred, label):
loss = (pred - label) ** 2
return torch.mean(loss)
class FinetuneModel(nn.Module):
"""
FinuetuneModel for adding a layer by the end
"""
def __init__(self, input_dim, output_dim, trained_model):
super().__init__()
self.model = trained_model
self.fc = nn.Linear(input_dim, output_dim)
def forward(self, x, priors):
return self.fc(self.model(x, priors)[0]).squeeze() # take the vec out
class DecoderStep(nn.Module):
def __init__(self, inp_dim, out_dim, shared, n_ind, vbs, device):
super().__init__()
self.fea_tran = FeatureTransformer(inp_dim, out_dim, shared, n_ind, vbs, device)
self.fc = nn.Linear(out_dim, out_dim)
def forward(self, x):
x = self.fea_tran(x)
return self.fc(x)
class TabNet_Decoder(nn.Module):
def __init__(self, inp_dim, out_dim, n_shared, n_ind, vbs, n_steps, device):
"""
TabNet decoder that is used in pre-training
"""
self.out_dim = out_dim
super().__init__()
if n_shared > 0:
self.shared = nn.ModuleList()
self.shared.append(nn.Linear(inp_dim, 2 * out_dim))
for x in range(n_shared - 1):
self.shared.append(nn.Linear(out_dim, 2 * out_dim)) # preset the linear function we will use
else:
self.shared = None
self.n_steps = n_steps
self.steps = nn.ModuleList()
for x in range(n_steps):
self.steps.append(DecoderStep(inp_dim, out_dim, self.shared, n_ind, vbs, device))
def forward(self, x):
out = torch.zeros(x.size(0), self.out_dim).to(x.device)
for step in self.steps:
out += step(x)
return out
class TabNet(nn.Module):
def __init__(
self, inp_dim=6, out_dim=6, n_d=64, n_a=64, n_shared=2, n_ind=2, n_steps=5, relax=1.2, vbs=1024, device="cpu"
):
"""
TabNet AKA the original encoder
Args:
n_d: dimension of the features used to calculate the final results
n_a: dimension of the features input to the attention transformer of the next step
n_shared: numbr of shared steps in feature transfomer(optional)
n_ind: number of independent steps in feature transformer
n_steps: number of steps of pass through tabbet
relax coefficient:
virtual batch size:
"""
super().__init__()
# set the number of shared step in feature transformer
if n_shared > 0:
self.shared = nn.ModuleList()
self.shared.append(nn.Linear(inp_dim, 2 * (n_d + n_a)))
for x in range(n_shared - 1):
self.shared.append(nn.Linear(n_d + n_a, 2 * (n_d + n_a))) # preset the linear function we will use
else:
self.shared = None
self.first_step = FeatureTransformer(inp_dim, n_d + n_a, self.shared, n_ind, vbs, device)
self.steps = nn.ModuleList()
for x in range(n_steps - 1):
self.steps.append(DecisionStep(inp_dim, n_d, n_a, self.shared, n_ind, relax, vbs, device))
self.fc = nn.Linear(n_d, out_dim)
self.bn = nn.BatchNorm1d(inp_dim, momentum=0.01)
self.n_d = n_d
def forward(self, x, priors):
assert not torch.isnan(x).any()
x = self.bn(x)
x_a = self.first_step(x)[:, self.n_d :]
sparse_loss = torch.zeros(1).to(x.device)
out = torch.zeros(x.size(0), self.n_d).to(x.device)
for step in self.steps:
x_te, l = step(x, x_a, priors)
out += F.relu(x_te[:, : self.n_d]) # split the feautre from feat_transformer
x_a = x_te[:, self.n_d :]
sparse_loss += l
return self.fc(out), sparse_loss
class GBN(nn.Module):
"""
Ghost Batch Normalization
an efficient way of doing batch normalization
Args:
vbs: virtual batch size
"""
def __init__(self, inp, vbs=1024, momentum=0.01):
super().__init__()
self.bn = nn.BatchNorm1d(inp, momentum=momentum)
self.vbs = vbs
def forward(self, x):
chunk = torch.chunk(x, x.size(0) // self.vbs, 0)
res = [self.bn(y) for y in chunk]
return torch.cat(res, 0)
class GLU(nn.Module):
"""
GLU block that extracts only the most essential information
Args:
vbs: virtual batch size
"""
def __init__(self, inp_dim, out_dim, fc=None, vbs=1024):
super().__init__()
if fc:
self.fc = fc
else:
self.fc = nn.Linear(inp_dim, out_dim * 2)
self.bn = GBN(out_dim * 2, vbs=vbs)
self.od = out_dim
def forward(self, x):
x = self.bn(self.fc(x))
return torch.mul(x[:, : self.od], torch.sigmoid(x[:, self.od :]))
class AttentionTransformer(nn.Module):
"""
Args:
relax: relax coefficient. The greater it is, we can
use the same features more. When it is set to 1
we can use every feature only once
"""
def __init__(self, d_a, inp_dim, relax, vbs=1024):
super().__init__()
self.fc = nn.Linear(d_a, inp_dim)
self.bn = GBN(inp_dim, vbs=vbs)
self.r = relax
# a:feature from previous decision step
def forward(self, a, priors):
a = self.bn(self.fc(a))
mask = SparsemaxFunction.apply(a * priors)
priors = priors * (self.r - mask) # updating the prior
return mask
class FeatureTransformer(nn.Module):
def __init__(self, inp_dim, out_dim, shared, n_ind, vbs, device):
super().__init__()
first = True
self.shared = nn.ModuleList()
if shared:
self.shared.append(GLU(inp_dim, out_dim, shared[0], vbs=vbs))
first = False
for fc in shared[1:]:
self.shared.append(GLU(out_dim, out_dim, fc, vbs=vbs))
else:
self.shared = None
self.independ = nn.ModuleList()
if first:
self.independ.append(GLU(inp, out_dim, vbs=vbs))
for x in range(first, n_ind):
self.independ.append(GLU(out_dim, out_dim, vbs=vbs))
self.scale = torch.sqrt(torch.tensor([0.5], device=device))
def forward(self, x):
if self.shared:
x = self.shared[0](x)
for glu in self.shared[1:]:
x = torch.add(x, glu(x))
x = x * self.scale
for glu in self.independ:
x = torch.add(x, glu(x))
x = x * self.scale
return x
class DecisionStep(nn.Module):
"""
One step for the TabNet
"""
def __init__(self, inp_dim, n_d, n_a, shared, n_ind, relax, vbs, device):
super().__init__()
self.atten_tran = AttentionTransformer(n_a, inp_dim, relax, vbs)
self.fea_tran = FeatureTransformer(inp_dim, n_d + n_a, shared, n_ind, vbs, device)
def forward(self, x, a, priors):
mask = self.atten_tran(a, priors)
sparse_loss = ((-1) * mask * torch.log(mask + 1e-10)).mean()
x = self.fea_tran(x * mask)
return x, sparse_loss
def make_ix_like(input, dim=0):
d = input.size(dim)
rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype)
view = [1] * input.dim()
view[0] = -1
return rho.view(view).transpose(0, dim)
class SparsemaxFunction(Function):
"""
SparseMax function for replacing reLU
"""
@staticmethod
def forward(ctx, input, dim=-1):
ctx.dim = dim
max_val, _ = input.max(dim=dim, keepdim=True)
input -= max_val # same numerical stability trick as for softmax
tau, supp_size = SparsemaxFunction.threshold_and_support(input, dim=dim)
output = torch.clamp(input - tau, min=0)
ctx.save_for_backward(supp_size, output)
return output
@staticmethod
def backward(ctx, grad_output):
supp_size, output = ctx.saved_tensors
dim = ctx.dim
grad_input = grad_output.clone()
grad_input[output == 0] = 0
v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze()
v_hat = v_hat.unsqueeze(dim)
grad_input = torch.where(output != 0, grad_input - v_hat, grad_input)
return grad_input, None
@staticmethod
def threshold_and_support(input, dim=-1):
input_srt, _ = torch.sort(input, descending=True, dim=dim)
input_cumsum = input_srt.cumsum(dim) - 1
rhos = make_ix_like(input, dim)
support = rhos * input_srt > input_cumsum
support_size = support.sum(dim=dim).unsqueeze(dim)
tau = input_cumsum.gather(dim, support_size - 1)
tau /= support_size.to(input.dtype)
return tau, support_size

View File

@@ -21,7 +21,7 @@ from .executor import SimulatorExecutor
from .executor import save_score_series, load_score_series
class Operator(object):
class Operator:
def __init__(self, client: str):
"""
Parameters

View File

@@ -38,7 +38,7 @@ def _calculate_report_data(df: pd.DataFrame) -> pd.DataFrame:
:param df:
:return:
"""
index_names = df.index.names
df.index = df.index.strftime("%Y-%m-%d")
report_df = pd.DataFrame()
@@ -58,6 +58,8 @@ def _calculate_report_data(df: pd.DataFrame) -> pd.DataFrame:
report_df["turnover"] = df["turnover"]
report_df.sort_index(ascending=True, inplace=True)
report_df.index.names = index_names
return report_df

View File

@@ -17,7 +17,7 @@ from plotly.figure_factory import create_distplot
from ...utils import get_module_by_module_path
class BaseGraph(object):
class BaseGraph:
""""""
_name = None
@@ -204,7 +204,7 @@ class HistogramGraph(BaseGraph):
return _data
class SubplotsGraph(object):
class SubplotsGraph:
"""Create subplots same as df.plot(subplots=True)
Simple package for `plotly.tools.subplots`

View File

@@ -30,7 +30,7 @@ class BaseStrategy:
Parameters
-----------
score_series : pd.Seires
score_series : pd.Series
stock_id , score.
current : Position()
current state of position.

View File

@@ -6,7 +6,7 @@ import copy
import os
class TunerConfigManager(object):
class TunerConfigManager:
def __init__(self, config_path):
if not config_path:
@@ -27,7 +27,7 @@ class TunerConfigManager(object):
self.qlib_client_config = config.get("qlib_client", dict())
class PipelineExperimentConfig(object):
class PipelineExperimentConfig:
def __init__(self, config, TUNER_CONFIG_MANAGER):
"""
:param config: The config dict for tuner experiment
@@ -53,7 +53,7 @@ class PipelineExperimentConfig(object):
yaml.dump(TUNER_CONFIG_MANAGER.config, fp)
class OptimizationConfig(object):
class OptimizationConfig:
def __init__(self, config, TUNER_CONFIG_MANAGER):
self.report_type = config.get("report_type", "pred_long")

View File

@@ -11,7 +11,7 @@ from ...log import get_module_logger, TimeInspector
from ...utils import get_module_by_module_path
class Pipeline(object):
class Pipeline:
GLOBAL_BEST_PARAMS_NAME = "global_best_params.json"

View File

@@ -19,7 +19,7 @@ from hyperopt import fmin, tpe
from hyperopt import STATUS_OK, STATUS_FAIL
class Tuner(object):
class Tuner:
def __init__(self, tuner_config, optim_config):
self.logger = get_module_logger("Tuner", sh_level=logging.INFO)

View File

@@ -8,7 +8,7 @@ from libc.math cimport sqrt, isnan, NAN
from libcpp.vector cimport vector
cdef class Expanding(object):
cdef class Expanding:
"""1-D array expanding"""
cdef vector[double] barv
cdef int na_count

View File

@@ -8,7 +8,7 @@ from libc.math cimport sqrt, isnan, NAN
from libcpp.deque cimport deque
cdef class Rolling(object):
cdef class Rolling:
"""1-D array rolling"""
cdef int window
cdef deque[double] barv

View File

@@ -157,7 +157,7 @@ class Expression(abc.ABC):
@abc.abstractmethod
def _load_internal(self, instrument, start_index, end_index, freq):
pass
raise NotImplementedError("This function must be implemented in your newly defined feature")
@abc.abstractmethod
def get_longest_back_rolling(self):

View File

@@ -13,6 +13,7 @@ import pickle
import traceback
import redis_lock
import contextlib
import abc
from pathlib import Path
import numpy as np
import pandas as pd
@@ -32,43 +33,107 @@ from ..utils import (
from ..log import get_module_logger
from .base import Feature
from .ops import *
from .ops import Operators
class QlibCacheException(RuntimeError):
pass
class MemCacheUnit(OrderedDict):
class MemCacheUnit(abc.ABC):
"""Memory Cache Unit."""
# TODO: use min_heap to replace ordereddict for better performance
def __init__(self, *args, **kwargs):
self.size_limit = kwargs.pop("size_limit", None)
# limit_type: check size_limit type, length(call fun: len) or size(call fun: sys.getsizeof)
self.limit_type = kwargs.pop("limit_type", "length")
super(MemCacheUnit, self).__init__(*args, **kwargs)
self._check_size_limit()
self.size_limit = kwargs.pop("size_limit", 0)
self._size = 0
self.od = OrderedDict()
def __setitem__(self, key, value):
super(MemCacheUnit, self).__setitem__(key, value)
self._check_size_limit()
# TODO: thread safe?__setitem__ failure might cause inconsistent size?
def __getitem__(self, key):
value = super(MemCacheUnit, self).__getitem__(key)
super(MemCacheUnit, self).__delitem__(key)
super(MemCacheUnit, self).__setitem__(key, value)
return value
# precalculate the size after od.__setitem__
self._adjust_size(key, value)
def _check_size_limit(self):
if self.size_limit is not None:
get_cur_size = lambda x: len(x) if self.limit_type == "length" else sum(map(sys.getsizeof, x.values()))
while get_cur_size(self) > self.size_limit:
self.od.__setitem__(key, value)
# move the key to end,make it latest
self.od.move_to_end(key)
if self.limited:
# pop the oldest items beyond size limit
while self._size > self.size_limit:
self.popitem(last=False)
def __getitem__(self, key):
v = self.od.__getitem__(key)
self.od.move_to_end(key)
return v
class MemCache(object):
def __contains__(self, key):
return key in self.od
def __len__(self):
return self.od.__len__()
def __repr__(self):
return f"{self.__class__.__name__}<size_limit:{self.size_limit if self.limited else 'no limit'} total_size:{self._size}>\n{self.od.__repr__()}"
def set_limit_size(self, limit):
self.size_limit = limit
@property
def limited(self):
"""whether memory cache is limited"""
return self.size_limit > 0
@property
def total_size(self):
return self._size
def clear(self):
self._size = 0
self.od.clear()
def popitem(self, last=True):
k, v = self.od.popitem(last=last)
self._size -= self._get_value_size(v)
return k, v
def pop(self, key):
v = self.od.pop(key)
self._size -= self._get_value_size(v)
return v
def _adjust_size(self, key, value):
if key in self.od:
self._size -= self._get_value_size(self.od[key])
self._size += self._get_value_size(value)
@abc.abstractmethod
def _get_value_size(self, value):
raise NotImplementedError
class MemCacheLengthUnit(MemCacheUnit):
def __init__(self, size_limit=0):
super().__init__(size_limit=size_limit)
def _get_value_size(self, value):
return 1
class MemCacheSizeofUnit(MemCacheUnit):
def __init__(self, size_limit=0):
super().__init__(size_limit=size_limit)
def _get_value_size(self, value):
return sys.getsizeof(value)
class MemCache:
"""Memory cache."""
def __init__(self, mem_cache_size_limit=None, limit_type="length"):
@@ -79,21 +144,19 @@ class MemCache(object):
mem_cache_size_limit: cache max size.
limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof).
"""
if limit_type not in ["length", "sizeof"]:
size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit
if limit_type == "length":
klass = MemCacheLengthUnit
elif limit_type == "sizeof":
klass = MemCacheSizeofUnit
else:
raise ValueError(f"limit_type must be length or sizeof, your limit_type is {limit_type}")
self.__calendar_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
self.__instrument_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
self.__feature_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
self.__calendar_mem_cache = klass(size_limit)
self.__instrument_mem_cache = klass(size_limit)
self.__feature_mem_cache = klass(size_limit)
def __getitem__(self, key):
if key == "c":
@@ -140,7 +203,7 @@ class MemCacheExpire:
return value, expire
class CacheUtils(object):
class CacheUtils:
LOCK_ID = "QLIB"
@staticmethod
@@ -224,7 +287,7 @@ class CacheUtils(object):
current_cache_wlock.release()
class BaseProviderCache(object):
class BaseProviderCache:
"""Provider cache base class"""
def __init__(self, provider):
@@ -762,8 +825,8 @@ class DiskDatasetCache(DatasetCache):
.. note:: The start is closed. The end is open!!!!!
- Each line contains two element <timestamp, end_index>
- It indicates the `end_index` of the data for `timestamp`
- Each line contains two element <start_index, end_index> with a timestamp as its index.
- It indicates the `start_index`(included) and `end_index`(excluded) of the data for `timestamp`
- meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta

View File

@@ -12,7 +12,7 @@ from ..log import get_module_logger
import pickle
class Client(object):
class Client:
"""A client class
Provide the connection tool functions for ClientProvider.

Some files were not shown because too many files have changed in this diff Show More