1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-29 17:11:20 +08:00

Compare commits

..

104 Commits

Author SHA1 Message Date
Linlang
5dfc661fc3 test build package with macos-15 2024-12-20 11:55:29 +08:00
Linlang
6a123db109 test pip install rebuild package 2024-12-20 11:43:36 +08:00
Linlang
c0f608e3f8 test pip install rebuild package 2024-12-20 11:40:43 +08:00
Linlang
dd67d235af test pip install rebuild package 2024-12-20 11:40:27 +08:00
Linlang
e111fc8172 rebuild total package 2024-12-20 11:19:36 +08:00
Linlang
1a0dc06108 rewrite deploy with macos 2024-12-19 23:57:00 +08:00
Linlang
0f0f49c860 rewrite deploy with macos 2024-12-19 23:54:28 +08:00
Linlang
f10affe9ca rewrite deploy with macos 2024-12-19 23:43:26 +08:00
Linlang
b8ff81ecd4 rewrite deploy with macos 2024-12-19 23:37:44 +08:00
Linlang
d478795b54 rewrite deploy with macos 2024-12-19 23:35:19 +08:00
Linlang
3bc1ae4aae rewrite deploy with macos 2024-12-19 23:32:53 +08:00
Linlang
80a81c0bf7 rewrite deploy with macos 2024-12-19 23:30:43 +08:00
Linlang
8581d30736 rewrite deploy with macos 2024-12-19 23:23:27 +08:00
Linlang
0486ab1f99 rewrite deploy with macos 2024-12-19 23:19:14 +08:00
Linlang
7e6bdc9493 rewrite deploy with macos 2024-12-19 23:17:54 +08:00
Linlang
5222de4934 rewrite deploy with macos 2024-12-19 22:41:20 +08:00
Linlang
94458af3f7 rewrite deploy with macos 2024-12-19 22:40:52 +08:00
Linlang
04ffb400b2 rewrite deploy with macos 2024-12-19 22:32:01 +08:00
Linlang
bfec2aa27a rewrite deploy with macos 2024-12-19 22:31:43 +08:00
Linlang
fed2691cbe rewrite deploy with macos 2024-12-19 22:08:00 +08:00
Linlang
abeb5cfbe0 rewrite deploy with macos 2024-12-19 21:37:55 +08:00
Linlang
59d28b8a11 rewrite deploy with macos 2024-12-19 21:21:15 +08:00
Linlang
22ca863113 rewrite deploy with macos 2024-12-19 20:59:46 +08:00
Linlang
01d2011905 rewrite deploy with macos 2024-12-19 20:47:55 +08:00
Linlang
e4910dd7d1 rewrite deploy with macos 2024-12-19 20:10:46 +08:00
Linlang
848e1f9b18 rewrite deploy with macos 2024-12-19 20:06:36 +08:00
Linlang
ece56371d9 rewrite deploy with macos 2024-12-19 19:32:54 +08:00
Linlang
ef289faf30 rewrite deploy with macos 2024-12-19 19:24:11 +08:00
Linlang
ed360a58d9 rewrite deploy with macos 2024-12-19 19:20:03 +08:00
Linlang
043d283d2a rewrite deploy with macos 2024-12-19 18:49:21 +08:00
Linlang
e28c1e0c38 rewrite deploy with macos 2024-12-19 18:44:41 +08:00
Linlang
2d595aca08 rewrite deploy with macos 2024-12-19 18:43:47 +08:00
Linlang
92e1f07a01 rewrite deploy with macos 2024-12-19 17:37:46 +08:00
Linlang
2a2615a71e rewrite deploy with macos 2024-12-19 17:33:36 +08:00
Linlang
36e2f66730 rewrite deploy with macos 2024-12-19 17:22:34 +08:00
Linlang
d31d17e7aa rewrite deploy with macos 2024-12-19 17:18:36 +08:00
Linlang
e3363208a0 test pip for macos 2024-12-19 16:50:29 +08:00
Linlang
fe8a860f5e build multi version for macos 2024-12-19 16:35:07 +08:00
Linlang
6f39f16695 build multi version for macos 2024-12-19 16:25:57 +08:00
Linlang
c03017628d build multi version for macos 2024-12-19 16:24:50 +08:00
Linlang
a9941bd25c fix pip install not matching packages 2024-12-19 15:43:45 +08:00
Linlang
9d324f6481 fix pip install not matching packages 2024-12-19 15:41:36 +08:00
Linlang
82e755dc7f fix pip install not matching packages 2024-12-19 15:29:51 +08:00
Linlang
f45d031727 fix pip install not matching packages 2024-12-19 15:16:44 +08:00
Linlang
0b938bdddd fix pip install not matching packages 2024-12-19 15:15:20 +08:00
Linlang
b80abdaf89 test package with pip 2024-12-19 14:59:33 +08:00
Linlang
2e2f9125a7 test package with pip 2024-12-19 14:58:28 +08:00
Linlang
16fc2f54c2 fix macos package name error 2024-12-19 14:40:09 +08:00
Linlang
b512074567 fix macos package name error 2024-12-19 14:23:39 +08:00
Linlang
351eea1f78 fix macos package name error 2024-12-19 13:59:08 +08:00
Linlang
f4bcbe3a0a fix macos package name error 2024-12-19 13:47:33 +08:00
Linlang
8e1c7d395b fix macos package name error 2024-12-19 13:37:58 +08:00
Linlang
5281d77fbb fix macos package name error 2024-12-19 13:09:00 +08:00
Linlang
431b9c1281 fix macos package name error 2024-12-19 12:29:15 +08:00
Linlang
6a4273c300 test pip install from testpypi 2024-12-19 10:50:20 +08:00
Linlang
05a9f8cd30 test pip install from testpypi 2024-12-19 10:41:31 +08:00
Linlang
34a53033f1 fix upload to testpypi error 2024-12-18 22:21:52 +08:00
Linlang
631fa33880 fix upload to testpypi error 2024-12-18 22:21:07 +08:00
Linlang
276ee144e7 fix upload to testpypi error 2024-12-18 22:09:51 +08:00
Linlang
b91d0fa076 fix upload to testpypi error 2024-12-18 22:03:35 +08:00
Linlang
c7d3f1f729 fix upload to testpypi error 2024-12-18 22:02:44 +08:00
Linlang
deffd9749f fix upload to testpypi error 2024-12-18 21:55:12 +08:00
Linlang
e82f225dc4 fix upload to testpypi error 2024-12-18 21:54:40 +08:00
Linlang
10f43b8f01 fix upload to testpypi error 2024-12-18 21:48:28 +08:00
Linlang
bac5642d69 fix upload to testpypi error 2024-12-18 21:27:57 +08:00
Linlang
1239a68edc fix upload to testpypi error 2024-12-18 21:26:43 +08:00
Linlang
07a8d72d50 fix upload to testpypi error 2024-12-18 21:21:29 +08:00
Linlang
ea9ef1343c fix upload to testpypi error 2024-12-18 21:15:59 +08:00
Linlang
cc4ac5d4d7 fix upload to testpypi error 2024-12-18 21:13:18 +08:00
Linlang
1aa61a6735 fix upload to testpypi error 2024-12-18 21:09:59 +08:00
Linlang
303e62445b fix upload to testpypi error 2024-12-18 20:54:43 +08:00
Linlang
6a0bc50386 fix upload to testpypi error 2024-12-18 20:52:07 +08:00
Linlang
86b4d4c707 fix upload to testpypi error 2024-12-18 20:44:02 +08:00
Linlang
808ef6b95e fix upload to testpypi error 2024-12-18 20:41:47 +08:00
Linlang
8aaa75daa4 fix upload to testpypi error 2024-12-18 20:15:27 +08:00
Linlang
34908c8846 fix upload to testpypi error 2024-12-18 20:13:10 +08:00
Linlang
687a2b209a fix upload to testpypi error 2024-12-18 20:06:41 +08:00
Linlang
df391f0569 fix upload to testpypi error 2024-12-18 19:58:05 +08:00
Linlang
44b674ce05 fix upload to testpypi error 2024-12-18 19:54:11 +08:00
Linlang
d592bfdea1 fix upload to testpypi error 2024-12-18 19:51:41 +08:00
Linlang
a0236654df fix upload to testpypi error 2024-12-18 19:44:50 +08:00
Linlang
666c8d541d fix upload to testpypi error 2024-12-18 19:42:49 +08:00
Linlang
c243557c13 fix upload to testpypi error 2024-12-18 19:30:26 +08:00
Linlang
624adb40a9 fix upload to testpypi error 2024-12-18 19:28:41 +08:00
Linlang
bb6acaf932 fix upload to testpypi error 2024-12-18 17:32:47 +08:00
Linlang
a8f3d382f8 fix upload to testpypi error 2024-12-18 17:29:47 +08:00
Linlang
625d119f64 fix upload to testpypi error 2024-12-18 17:25:44 +08:00
Linlang
0cafe3e12e fix upload to testpypi error 2024-12-18 17:21:59 +08:00
Linlang
a6164092a9 fix upload to testpypi error 2024-12-18 17:10:05 +08:00
Linlang
a6646dac3e fix upload to testpypi error 2024-12-18 14:52:40 +08:00
Linlang
5a8ff65a3b fix upload to testpypi error 2024-12-18 14:43:38 +08:00
Linlang
b43f8eac31 test upload to testpypi 2024-12-18 14:33:33 +08:00
Linlang
a6e9d8b594 fix build error 2024-12-17 21:13:21 +08:00
Linlang
4330975f4c fix build error 2024-12-17 21:02:49 +08:00
Linlang
fa1a4ce8f9 fix build error 2024-12-17 20:51:56 +08:00
Linlang
5b0fb43f31 fix build error 2024-12-17 20:50:44 +08:00
Linlang
7dec56876f fix build error 2024-12-17 20:41:53 +08:00
Linlang
9b9accdd68 fix build error 2024-12-17 20:22:19 +08:00
Linlang
7c9ef8db88 fix manylinux error 2024-12-17 15:25:20 +08:00
Linlang
b05d442f34 fix manylinux error 2024-12-17 15:22:43 +08:00
Linlang
0de40d1f8f fix manylinux error 2024-12-17 15:19:04 +08:00
Linlang
9e4bdc21ac fix manylinux error 2024-12-17 15:02:42 +08:00
Linlang
dbbcfef723 fix manylinux error 2024-12-17 14:46:58 +08:00
Linlang
8f16f6b0bd update publish.yaml 2024-12-17 11:48:04 +08:00
27 changed files with 79 additions and 515 deletions

View File

@@ -3,16 +3,22 @@
name: Upload Python Package
# on:
# release:
# types: [published]
on:
release:
types: [published]
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
deploy_with_bdist_wheel:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-13, macos-latest]
os: [windows-latest, macos-13, macos-latest, macos-15]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
exclude:
- os: macos-13
@@ -32,13 +38,13 @@ jobs:
- name: Build wheel on ${{ matrix.os }}
run: |
make build
- name: Upload to PyPi
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
TWINE_PASSWORD: ${{ secrets.TESTPYPI_TOKEN }}
run: |
ls dist
twine check dist/*.whl
twine upload dist/*.whl --verbose
deploy_with_manylinux:
runs-on: ubuntu-latest
@@ -51,15 +57,19 @@ jobs:
- name: Build wheel on Linux
uses: RalfG/python-wheels-manylinux-build@v0.7.1-manylinux2014_x86_64
with:
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312'
build-requirements: 'numpy cython'
- name: Install dependencies
run: |
python -m pip install twine
- name: Upload to PyPi
python -m pip list
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
TWINE_PASSWORD: ${{ secrets.TESTPYPI_TOKEN }}
run: |
twine check dist/pyqlib-*-manylinux*.whl
twine upload dist/pyqlib-*-manylinux*.whl --verbose
ls dist
twine check dist/*.whl

View File

@@ -13,10 +13,13 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# If you want to use python 3.7 in github action, then the latest macos system version is macos-13,
# after macos-13 python 3.7 is no longer supported.
# so we limit the macos version to macos-13.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
# In github action, using python 3.7, pip install will not match the latest version of the package.
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
# All things considered, we have removed python 3.7.
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
@@ -31,16 +34,10 @@ jobs:
- name: Update pip to the latest version
run: |
python -m pip install --upgrade pip
# Will cancel this step when the next qlib version is released. The current qlib version is: 0.9.6
- name: Installing pywinpt for windows
if: ${{ matrix.os == 'windows-latest' }}
run: |
python -m pip install pywinpty --only-binary=:all:
- name: Qlib installation test
run: |
python -m pip install pyqlib
python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ pyqlib==0.9.5.80
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}

View File

@@ -1,113 +0,0 @@
name: Test qlib from source
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
timeout-minutes: 180
# we may retry for 3 times for `Unit tests with Pytest`
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
# In github action, using python 3.7, pip install will not match the latest version of the package.
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
# All things considered, we have removed python 3.7.
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- name: Test qlib from source
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Update pip to the latest version
run: |
python -m pip install --upgrade pip
- name: Installing pytorch for macos
if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
run: |
python -m pip install torch torchvision torchaudio
- name: Installing pytorch for ubuntu
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-22.04' }}
run: |
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- name: Installing pytorch for windows
if: ${{ matrix.os == 'windows-latest' }}
run: |
python -m pip install torch torchvision torchaudio
- name: Set up Python tools
run: |
make dev
- name: Lint with Black
run: |
make black
- name: Make html with sphinx
# Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04.
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
make docs-gen
- name: Check Qlib with pylint
run: |
make pylint
- name: Check Qlib with flake8
run: |
make flake8
- name: Check Qlib with mypy
run: |
make mypy
- name: Check Qlib ipynb with nbqa
run: |
make nbqa
- name: Test data downloads
run: |
python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
python scripts/get_data.py download_data --file_name rl_data.zip --target_dir tests/.data/rl
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
- name: Check Qlib ipynb with nbconvert
run: |
make nbconvert
- name: Test workflow by config (install from source)
run: |
python -m pip install numba
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
- name: Unit tests with Pytest
uses: nick-fields/retry@v2
with:
timeout_minutes: 60
max_attempts: 3
command: |
cd tests
python -m pytest . -m "not slow" --durations=0

View File

@@ -1,58 +0,0 @@
name: Test qlib from source slow
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
timeout-minutes: 720
# we may retry for 3 times for `Unit tests with Pytest`
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
# In github action, using python 3.7, pip install will not match the latest version of the package.
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
# All things considered, we have removed python 3.7.
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- name: Test qlib from source slow
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Set up Python tools
run: |
make dev
- name: Downloads dependencies data
run: |
python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
- name: Install Lightgbm for MacOS
if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
- name: Unit tests with Pytest
uses: nick-fields/retry@v2
with:
timeout_minutes: 240
max_attempts: 3
command: |
cd tests
python -m pytest . -m "slow" --durations=0

View File

@@ -12,12 +12,6 @@ PUBLIC_DIR := $(shell [ "$$READTHEDOCS" = "True" ] && echo "$$READTHEDOCS_OUTPUT
SO_DIR := qlib/data/_libs
SO_FILES := $(wildcard $(SO_DIR)/*.so)
ifeq ($(OS),Windows_NT)
IS_WINDOWS = true
else
IS_WINDOWS = false
endif
########################################################################################
# Development Environment Management
########################################################################################
@@ -54,10 +48,6 @@ deepclean: clean
# What this code does is compile two Cython modules, rolling and expanding, using setuptools and Cython,
# and builds them as binary expansion modules that can be imported directly into Python.
# Since pyproject.toml can't do that, we compile it here.
# pywinpty as a dependency of jupyter on windows, if you use pip install pywinpty installation,
# will first download the tar.gz file, and then locally compiled and installed,
# this will lead to some unnecessary trouble, so we choose to install the compiled whl file, to avoid trouble.
prerequisite:
@if [ -n "$(SO_FILES)" ]; then \
echo "Shared library files exist, skipping build."; \
@@ -68,10 +58,6 @@ prerequisite:
python -c "from setuptools import setup, Extension; from Cython.Build import cythonize; import numpy; extensions = [Extension('qlib.data._libs.rolling', ['qlib/data/_libs/rolling.pyx'], language='c++', include_dirs=[numpy.get_include()]), Extension('qlib.data._libs.expanding', ['qlib/data/_libs/expanding.pyx'], language='c++', include_dirs=[numpy.get_include()])]; setup(ext_modules=cythonize(extensions, language_level='3'), script_args=['build_ext', '--inplace'])"; \
fi
@if [ "$(IS_WINDOWS)" = "true" ]; then \
python -m pip install pywinpty --only-binary=:all:; \
fi
# Install the package in editable mode.
dependencies:
python -m pip install -e .
@@ -101,7 +87,7 @@ analysis:
python -m pip install -e .[analysis]
all:
python -m pip install -e .[pywinpty,dev,lint,docs,package,test,analysis,rl]
python -m pip install -e .[dev,lint,docs,package,test,analysis,rl]
install: prerequisite dependencies

View File

@@ -155,15 +155,15 @@ Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how
This table demonstrates the supported Python version of `Qlib`:
| | install with pip | install from source | plot |
| ------------- |:---------------------:|:--------------------:|:------------------:|
| Python 3.7 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.8 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.9 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.10 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.11 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.12 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.9 | :x: | :heavy_check_mark: | :x: |
**Note**:
1. **Conda** is suggested for managing your Python environment. In some cases, using Python outside of a `conda` environment may result in missing header files, causing the installation failure of certain packages.
2. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.8 or higher, or use `conda`'s Python to install ``Qlib`` from source.
1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
1. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
1. `Qlib`Requires `tables` package, `hdf5` in tables does not support python3.9.
### Install with pip
Users can easily install ``Qlib`` by pip according to the following command.
@@ -181,7 +181,7 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
```bash
pip install numpy
pip install --upgrade cython
pip install --upgrade cython
```
* Clone the repository and install ``Qlib`` as follows.
@@ -189,6 +189,7 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
git clone https://github.com/microsoft/qlib.git && cd qlib
pip install . # `pip install -e .[dev]` is recommended for development. check details in docs/developer/code_standard_and_dev_guide.rst
```
**Note**: You can install Qlib with `python setup.py install` as well. But it is not the recommended approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
@@ -196,11 +197,11 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
## Data Preparation
❗ Due to more restrict data security policy. The offical dataset is disabled temporarily. You can try [this data source](https://github.com/chenditc/investment_data/releases) contributed by the community.
Here is an example to download the latest data.
Here is an example to download the data updated on 20240809.
```bash
wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
wget https://github.com/chenditc/investment_data/releases/download/2024-08-09/qlib_bin.tar.gz
mkdir -p ~/.qlib/qlib_data/cn_data
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=1
rm -f qlib_bin.tar.gz
```
@@ -264,16 +265,6 @@ We recommend users to prepare their own data if they have a high-quality dataset
* *trading_date*: start of trading day
* *end_date*: end of trading day(not included)
### Checking the health of the data
* We provide a script to check the health of the data, you can run the following commands to check whether the data is healthy or not.
```
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data
```
* Of course, you can also add some parameters to adjust the test results, such as this.
```
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --missing_data_num 30055 --large_step_threshold_volume 94485 --large_step_threshold_price 20
```
* If you want more information about `check_data_health`, please refer to the [documentation](https://qlib.readthedocs.io/en/latest/component/data.html#checking-the-health-of-the-data).
<!--
- Run the initialization code and get stock data:

View File

@@ -197,57 +197,6 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.
If you want to use your own alpha-factor which can't be calculate by OCHLV, like PE, EPS and so on, you could add it to the CSV files with OHCLV together and then dump it to the Qlib format data.
Checking the health of the data
-------------------------------
``Qlib`` provides a script to check the health of the data.
- The main points to check are as follows
- Check if any data is missing in the DataFrame.
- Check if there are any large step changes above the threshold in the OHLCV columns.
- Check if any of the required columns (OLHCV) are missing in the DataFrame.
- Check if the 'factor' column is missing in the DataFrame.
- You can run the following commands to check whether the data is healthy or not.
for daily data:
.. code-block:: bash
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data
for 1min data:
.. code-block:: bash
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data_1min --freq 1min
- Of course, you can also add some parameters to adjust the test results.
- The available parameters are these.
- freq: Frequency of data.
- large_step_threshold_price: Maximum permitted price change
- large_step_threshold_volume: Maximum permitted volume change.
- missing_data_num: Maximum value for which data is allowed to be null.
- You can run the following commands to check whether the data is healthy or not.
for daily data:
.. code-block:: bash
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --missing_data_num 30055 --large_step_threshold_volume 94485 --large_step_threshold_price 20
for 1min data:
.. code-block:: bash
python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --freq 1min --missing_data_num 35806 --large_step_threshold_volume 3205452000000 --large_step_threshold_price 0.91
Stock Pool (Market)
-------------------

View File

@@ -25,7 +25,7 @@ The design of the framework is shown in the yellow part in the middle of the fig
The frequency of the trading algorithm, decision content and execution environment can be customized by users (e.g. intraday trading, daily-frequency trading, weekly-frequency trading), and the execution environment can be nested with finer-grained trading algorithm and execution environment inside (i.e. sub-workflow in the figure, e.g. daily-frequency orders can be turned into finer-grained decisions by splitting orders within the day). The flexibility of the nested decision execution framework makes it easy for users to explore the effects of combining different levels of trading strategies and break down the optimization barriers between different levels of the trading algorithm.
The optimization for the nested decision execution framework can be implemented with the support of `QlibRL <./rl/overall.html>`_. To know more about how to use the QlibRL, go to API Reference: `RL API <../reference/api.html#rl>`_.
The optimization for the nested decision execution framework can be implemented with the support of `QlibRL <https://qlib.readthedocs.io/en/latest/component/rl.html>`_. To know more about how to use the QlibRL, go to API Reference: `RL API <../reference/api.html#rl>`_.
Example
=======

View File

@@ -7,7 +7,7 @@ The table below shows the performances of different solutions on different forec
## Alpha158 Dataset
Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
```bash
wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
mkdir -p ~/.qlib/qlib_data/cn_data
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
rm -f qlib_bin.tar.gz

View File

@@ -1,8 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
The motivation of this demo
- To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
The motivation of this demo
- To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
"""
from copy import deepcopy

View File

@@ -1,8 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
The motivation of this demo
- To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
The motivation of this demo
- To show the data modules of Qlib is Serializable, users can dump processed data to disk to avoid duplicated data preprocessing
"""
from copy import deepcopy

View File

@@ -1,10 +1,10 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
NOTE:
- This scripts is a demo to import example data import Qlib
- !!!!!!!!!!!!!!!TODO!!!!!!!!!!!!!!!!!!!:
- Its structure is not well designed and very ugly, your contribution is welcome to make importing dataset easier
NOTE:
- This scripts is a demo to import example data import Qlib
- !!!!!!!!!!!!!!!TODO!!!!!!!!!!!!!!!!!!!:
- Its structure is not well designed and very ugly, your contribution is welcome to make importing dataset easier
"""
from datetime import date, datetime as dt
import os

View File

@@ -1,7 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
Qlib provides two kinds of interfaces.
Qlib provides two kinds of interfaces.
(1) Users could define the Quant research workflow by a simple configuration.
(2) Qlib is designed in a modularized way and supports creating research workflow by code just like building blocks.

View File

@@ -79,7 +79,6 @@ package = [
test = [
"yahooquery",
"baostock",
"akshare",
]
analysis = [
"plotly",

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License.
from pathlib import Path
__version__ = "0.9.6.99"
__version__ = "0.9.5.80"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os
from typing import Union

View File

@@ -427,10 +427,6 @@ class Indicator:
# NOTE ~(price_s < 1e-08) is different from price_s >= 1e-8
# ~(np.nan < 1e-8) -> ~(False) -> True
# if price_s is empty
if price_s.empty:
return None, None
assert isinstance(price_s, idd.SingleData)
if agg == "vwap":
volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None)

View File

@@ -326,10 +326,8 @@ class SBBStrategyEMA(SBBStrategyBase):
if instruments is None:
warnings.warn("`instruments` is not set, will load all stocks")
self.instruments = "all"
elif isinstance(instruments, str):
if isinstance(instruments, str):
self.instruments = D.instruments(instruments)
elif isinstance(instruments, List):
self.instruments = instruments
self.freq = freq
super(SBBStrategyEMA, self).__init__(
outer_trade_decision, level_infra, common_infra, trade_exchange=trade_exchange, **kwargs

View File

@@ -1,9 +1,9 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
This module is not a necessary part of Qlib.
They are just some tools for convenience
It is should not imported into the core part of qlib
This module is not a necessary part of Qlib.
They are just some tools for convenience
It is should not imported into the core part of qlib
"""
import torch
import numpy as np

View File

@@ -200,7 +200,7 @@ class Trainer:
if ckpt_path is not None:
_logger.info("Resuming states from %s", str(ckpt_path))
self.load_state_dict(torch.load(ckpt_path, weights_only=False))
self.load_state_dict(torch.load(ckpt_path))
else:
self.initialize()

View File

@@ -71,6 +71,6 @@ qlib.init(provider_uri=provider_uri, region=REG_CN)
## Use Crowd Sourced Data
The is also a [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
```bash
wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
```

View File

@@ -1,203 +0,0 @@
from loguru import logger
import os
from typing import Optional
import fire
import pandas as pd
import qlib
from tqdm import tqdm
from qlib.data import D
class DataHealthChecker:
"""Checks a dataset for data completeness and correctness. The data will be converted to a pd.DataFrame and checked for the following problems:
- any of the columns ["open", "high", "low", "close", "volume"] are missing
- any data is missing
- any step change in the OHLCV columns is above a threshold (default: 0.5 for price, 3 for volume)
- any factor is missing
"""
def __init__(
self,
csv_path=None,
qlib_dir=None,
freq="day",
large_step_threshold_price=0.5,
large_step_threshold_volume=3,
missing_data_num=0,
):
assert csv_path or qlib_dir, "One of csv_path or qlib_dir should be provided."
assert not (csv_path and qlib_dir), "Only one of csv_path or qlib_dir should be provided."
self.data = {}
self.problems = {}
self.freq = freq
self.large_step_threshold_price = large_step_threshold_price
self.large_step_threshold_volume = large_step_threshold_volume
self.missing_data_num = missing_data_num
if csv_path:
assert os.path.isdir(csv_path), f"{csv_path} should be a directory."
files = [f for f in os.listdir(csv_path) if f.endswith(".csv")]
for filename in tqdm(files, desc="Loading data"):
df = pd.read_csv(os.path.join(csv_path, filename))
self.data[filename] = df
elif qlib_dir:
qlib.init(provider_uri=qlib_dir)
self.load_qlib_data()
def load_qlib_data(self):
instruments = D.instruments(market="all")
instrument_list = D.list_instruments(instruments=instruments, as_list=True, freq=self.freq)
required_fields = ["$open", "$close", "$low", "$high", "$volume", "$factor"]
for instrument in instrument_list:
df = D.features([instrument], required_fields, freq=self.freq)
df.rename(
columns={
"$open": "open",
"$close": "close",
"$low": "low",
"$high": "high",
"$volume": "volume",
"$factor": "factor",
},
inplace=True,
)
self.data[instrument] = df
print(df)
def check_missing_data(self) -> Optional[pd.DataFrame]:
"""Check if any data is missing in the DataFrame."""
result_dict = {
"instruments": [],
"open": [],
"high": [],
"low": [],
"close": [],
"volume": [],
}
for filename, df in self.data.items():
missing_data_columns = df.isnull().sum()[df.isnull().sum() > self.missing_data_num].index.tolist()
if len(missing_data_columns) > 0:
result_dict["instruments"].append(filename)
result_dict["open"].append(df.isnull().sum()["open"])
result_dict["high"].append(df.isnull().sum()["high"])
result_dict["low"].append(df.isnull().sum()["low"])
result_dict["close"].append(df.isnull().sum()["close"])
result_dict["volume"].append(df.isnull().sum()["volume"])
result_df = pd.DataFrame(result_dict).set_index("instruments")
if not result_df.empty:
return result_df
else:
logger.info(f"✅ There are no missing data.")
return None
def check_large_step_changes(self) -> Optional[pd.DataFrame]:
"""Check if there are any large step changes above the threshold in the OHLCV columns."""
result_dict = {
"instruments": [],
"col_name": [],
"date": [],
"pct_change": [],
}
for filename, df in self.data.items():
affected_columns = []
for col in ["open", "high", "low", "close", "volume"]:
if col in df.columns:
pct_change = df[col].pct_change(fill_method=None).abs()
threshold = self.large_step_threshold_volume if col == "volume" else self.large_step_threshold_price
if pct_change.max() > threshold:
large_steps = pct_change[pct_change > threshold]
result_dict["instruments"].append(filename)
result_dict["col_name"].append(col)
result_dict["date"].append(large_steps.index.to_list()[0][1].strftime("%Y-%m-%d"))
result_dict["pct_change"].append(pct_change.max())
affected_columns.append(col)
result_df = pd.DataFrame(result_dict).set_index("instruments")
if not result_df.empty:
return result_df
else:
logger.info(f"✅ There are no large step changes in the OHLCV column above the threshold.")
return None
def check_required_columns(self) -> Optional[pd.DataFrame]:
"""Check if any of the required columns (OLHCV) are missing in the DataFrame."""
required_columns = ["open", "high", "low", "close", "volume"]
result_dict = {
"instruments": [],
"missing_col": [],
}
for filename, df in self.data.items():
if not all(column in df.columns for column in required_columns):
missing_required_columns = [column for column in required_columns if column not in df.columns]
result_dict["instruments"].append(filename)
result_dict["missing_col"] += missing_required_columns
result_df = pd.DataFrame(result_dict).set_index("instruments")
if not result_df.empty:
return result_df
else:
logger.info(f"✅ The columns (OLHCV) are complete and not missing.")
return None
def check_missing_factor(self) -> Optional[pd.DataFrame]:
"""Check if the 'factor' column is missing in the DataFrame."""
result_dict = {
"instruments": [],
"missing_factor_col": [],
"missing_factor_data": [],
}
for filename, df in self.data.items():
if "000300" in filename or "000903" in filename or "000905" in filename:
continue
if "factor" not in df.columns:
result_dict["instruments"].append(filename)
result_dict["missing_factor_col"].append(True)
if df["factor"].isnull().all():
if filename in result_dict["instruments"]:
result_dict["missing_factor_data"].append(True)
else:
result_dict["instruments"].append(filename)
result_dict["missing_factor_col"].append(False)
result_dict["missing_factor_data"].append(True)
result_df = pd.DataFrame(result_dict).set_index("instruments")
if not result_df.empty:
return result_df
else:
logger.info(f"✅ The `factor` column already exists and is not empty.")
return None
def check_data(self):
check_missing_data_result = self.check_missing_data()
check_large_step_changes_result = self.check_large_step_changes()
check_required_columns_result = self.check_required_columns()
check_missing_factor_result = self.check_missing_factor()
if (
check_large_step_changes_result is not None
or check_large_step_changes_result is not None
or check_required_columns_result is not None
or check_missing_factor_result is not None
):
print(f"\nSummary of data health check ({len(self.data)} files checked):")
print("-------------------------------------------------")
if isinstance(check_missing_data_result, pd.DataFrame):
logger.warning(f"There is missing data.")
print(check_missing_data_result)
if isinstance(check_large_step_changes_result, pd.DataFrame):
logger.warning(f"The OHLCV column has large step changes.")
print(check_large_step_changes_result)
if isinstance(check_required_columns_result, pd.DataFrame):
logger.warning(f"Columns (OLHCV) are missing.")
print(check_required_columns_result)
if isinstance(check_missing_factor_result, pd.DataFrame):
logger.warning(f"The factor column does not exist or is empty")
print(check_missing_factor_result)
if __name__ == "__main__":
fire.Fire(DataHealthChecker)

View File

@@ -23,9 +23,7 @@ from data_collector.utils import get_calendar_list, get_trading_date_by_shift, d
from data_collector.utils import get_instruments
NEW_COMPANIES_URL = (
"https://oss-ch.csindex.com.cn/static/html/csindex/public/uploads/file/autofile/cons/{index_code}cons.xls"
)
NEW_COMPANIES_URL = "https://csi-web-dev.oss-cn-shanghai-finance-1-pub.aliyuncs.com/static/html/csindex/public/uploads/file/autofile/cons/{index_code}cons.xls"
INDEX_CHANGES_URL = "https://www.csindex.com.cn/csindex-home/search/search-content?lang=cn&searchInput=%E5%85%B3%E4%BA%8E%E8%B0%83%E6%95%B4%E6%B2%AA%E6%B7%B1300%E5%92%8C%E4%B8%AD%E8%AF%81%E9%A6%99%E6%B8%AF100%E7%AD%89%E6%8C%87%E6%95%B0%E6%A0%B7%E6%9C%AC&pageNum={page_num}&pageSize={page_size}&sortField=date&dateRange=all&contentType=announcement"

View File

@@ -16,9 +16,9 @@ The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repos
## How to use it in qlib
### Option 1: Download release bin data
User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/latest
User can download data in qlib bin format and use it directly: https://github.com/chenditc/investment_data/releases/tag/20220720
```bash
wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
```

View File

@@ -13,7 +13,6 @@ import functools
from pathlib import Path
from typing import Iterable, Tuple, List
import akshare as ak
import numpy as np
import pandas as pd
from loguru import logger
@@ -203,9 +202,18 @@ def get_hs_stock_symbols() -> list:
-------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
"""
stock_info_a_code_name_df = ak.stock_info_a_code_name()
stock_codes = stock_info_a_code_name_df["code"].tolist()
_symbols = [code for code in stock_codes if code and code.strip()]
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
try:
resp = requests.get(url, timeout=None)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
try:
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise
if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.")

View File

@@ -50,6 +50,12 @@ pip install -r requirements.txt
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
# us 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us --interval 1d
# us 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data_1min --region us --interval 1min
# in 1d
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data --region in --interval 1d
# in 1min
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/in_data_1min --region in --interval 1min
```
### Collector *YahooFinance* data to qlib

View File

@@ -37,5 +37,5 @@ setup(
language="c++",
include_dirs=[NUMPY_INCLUDE],
),
],
]
)

View File

@@ -194,7 +194,7 @@ def test_trainer_checkpoint():
assert (output_dir / "002.pth").exists()
assert os.readlink(output_dir / "latest.pth") == str(output_dir / "002.pth")
trainer.load_state_dict(torch.load(output_dir / "001.pth", weights_only=False))
trainer.load_state_dict(torch.load(output_dir / "001.pth"))
assert trainer.current_iter == 1
assert trainer.current_episode == 100