mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Merge branch 'main' of github.com:microsoft/qlib into fix_docs
This commit is contained in:
10
.github/workflows/python-publish.yml
vendored
10
.github/workflows/python-publish.yml
vendored
@@ -51,8 +51,8 @@ jobs:
|
||||
python setup.py bdist_wheel
|
||||
- name: Build and publish
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||
run: |
|
||||
twine upload dist/*
|
||||
|
||||
@@ -72,10 +72,10 @@ jobs:
|
||||
python-version: 3.7
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install twine
|
||||
pip install twine
|
||||
- name: Build and publish
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||
run: |
|
||||
twine upload dist/pyqlib-*-manylinux*.whl
|
||||
|
||||
6
.github/workflows/release-drafter.yml
vendored
6
.github/workflows/release-drafter.yml
vendored
@@ -6,8 +6,14 @@ on:
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
update_release_draft:
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# Drafts your next Release notes as Pull Requests are merged into "master"
|
||||
|
||||
5
.github/workflows/test_qlib_from_pip.yml
vendored
5
.github/workflows/test_qlib_from_pip.yml
vendored
@@ -13,7 +13,10 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
|
||||
# Since macos-latest changed from 12.7.4 to 14.4.1,
|
||||
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
|
||||
# so we limit the macos version to macos-12.
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
|
||||
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
|
||||
python-version: [3.7, 3.8]
|
||||
|
||||
|
||||
5
.github/workflows/test_qlib_from_source.yml
vendored
5
.github/workflows/test_qlib_from_source.yml
vendored
@@ -14,7 +14,10 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
|
||||
# Since macos-latest changed from 12.7.4 to 14.4.1,
|
||||
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
|
||||
# so we limit the macos version to macos-12.
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
|
||||
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
|
||||
python-version: [3.7, 3.8]
|
||||
|
||||
|
||||
@@ -14,7 +14,10 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
|
||||
# Since macos-latest changed from 12.7.4 to 14.4.1,
|
||||
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
|
||||
# so we limit the macos version to macos-12.
|
||||
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
|
||||
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
|
||||
python-version: [3.7, 3.8]
|
||||
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -48,4 +48,4 @@ tags
|
||||
*.swp
|
||||
|
||||
./pretrain
|
||||
.idea/
|
||||
.idea/
|
||||
@@ -172,6 +172,8 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
|
||||
|
||||
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
|
||||
|
||||
**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully.
|
||||
|
||||
## Data Preparation
|
||||
Load and prepare data by running the following code:
|
||||
|
||||
|
||||
@@ -324,7 +324,6 @@ class TRAModel(Model):
|
||||
|
||||
|
||||
class LSTM(nn.Module):
|
||||
|
||||
"""LSTM Model
|
||||
|
||||
Args:
|
||||
@@ -414,7 +413,6 @@ class PositionalEncoding(nn.Module):
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
|
||||
"""Transformer Model
|
||||
|
||||
Args:
|
||||
@@ -475,7 +473,6 @@ class Transformer(nn.Module):
|
||||
|
||||
|
||||
class TRA(nn.Module):
|
||||
|
||||
"""Temporal Routing Adaptor (TRA)
|
||||
|
||||
TRA takes historical prediction errors & latent representation as inputs,
|
||||
|
||||
@@ -27,13 +27,11 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency
|
||||
2. Please follow following steps to download example data
|
||||
```bash
|
||||
cd examples/orderbook_data/
|
||||
wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
|
||||
tar xf highfreq_orderboook_example_data.tar.bz2
|
||||
python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
|
||||
```
|
||||
|
||||
3. Please import the example data to your mongo db
|
||||
```bash
|
||||
cd examples/orderbook_data/
|
||||
python create_dataset.py initialize_library # Initialization Libraries
|
||||
python create_dataset.py import_data # Initialization Libraries
|
||||
```
|
||||
@@ -42,7 +40,6 @@ python create_dataset.py import_data # Initialization Libraries
|
||||
|
||||
After importing these data, you run `example.py` to create some high-frequency features.
|
||||
```bash
|
||||
cd examples/orderbook_data/
|
||||
pytest -s --disable-warnings example.py # If you want run all examples
|
||||
pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example
|
||||
```
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Licensed under the MIT License.
|
||||
from pathlib import Path
|
||||
|
||||
__version__ = "0.9.3.99"
|
||||
__version__ = "0.9.4.99"
|
||||
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
@@ -162,13 +162,15 @@ def create_account_instance(
|
||||
init_cash=init_cash,
|
||||
position_dict=position_dict,
|
||||
pos_type=pos_type,
|
||||
benchmark_config={}
|
||||
if benchmark is None
|
||||
else {
|
||||
"benchmark": benchmark,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
},
|
||||
benchmark_config=(
|
||||
{}
|
||||
if benchmark is None
|
||||
else {
|
||||
"benchmark": benchmark,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -622,9 +622,11 @@ class Indicator:
|
||||
print(
|
||||
"[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
|
||||
freq,
|
||||
trade_start_time
|
||||
if isinstance(trade_start_time, str)
|
||||
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
(
|
||||
trade_start_time
|
||||
if isinstance(trade_start_time, str)
|
||||
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
),
|
||||
fulfill_rate,
|
||||
price_advantage,
|
||||
positive_rate,
|
||||
|
||||
@@ -3,6 +3,7 @@ Here is a batch of evaluation functions.
|
||||
|
||||
The interface should be redesigned carefully in the future.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Tuple
|
||||
from qlib import get_module_logger
|
||||
|
||||
@@ -511,7 +511,6 @@ class TRAModel(Model):
|
||||
|
||||
|
||||
class RNN(nn.Module):
|
||||
|
||||
"""RNN Model
|
||||
|
||||
Args:
|
||||
@@ -601,7 +600,6 @@ class PositionalEncoding(nn.Module):
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
|
||||
"""Transformer Model
|
||||
|
||||
Args:
|
||||
@@ -649,7 +647,6 @@ class Transformer(nn.Module):
|
||||
|
||||
|
||||
class TRA(nn.Module):
|
||||
|
||||
"""Temporal Routing Adaptor (TRA)
|
||||
|
||||
TRA takes historical prediction errors & latent representation as inputs,
|
||||
|
||||
@@ -373,7 +373,6 @@ class WeightStrategyBase(BaseSignalStrategy):
|
||||
|
||||
|
||||
class EnhancedIndexingStrategy(WeightStrategyBase):
|
||||
|
||||
"""Enhanced Indexing Strategy
|
||||
|
||||
Enhanced indexing combines the arts of active management and passive management,
|
||||
|
||||
@@ -30,7 +30,6 @@ class Ensemble:
|
||||
|
||||
|
||||
class SingleKeyEnsemble(Ensemble):
|
||||
|
||||
"""
|
||||
Extract the object if there is only one key and value in the dict. Make the result more readable.
|
||||
{Only key: Only value} -> Only value
|
||||
@@ -64,7 +63,6 @@ class SingleKeyEnsemble(Ensemble):
|
||||
|
||||
|
||||
class RollingEnsemble(Ensemble):
|
||||
|
||||
"""Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble.
|
||||
|
||||
NOTE: The values of dict must be pd.DataFrame, and have the index "datetime".
|
||||
|
||||
@@ -247,9 +247,7 @@ class ShrinkCovEstimator(RiskModel):
|
||||
v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
|
||||
roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
|
||||
v3 = z.T.dot(z) / t - var_mkt * S
|
||||
roff3 = (
|
||||
np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
|
||||
)
|
||||
roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
|
||||
roff = 2 * roff1 - roff3
|
||||
rho = rdiag + roff
|
||||
|
||||
|
||||
@@ -25,7 +25,12 @@ import pandas as pd
|
||||
from pathlib import Path
|
||||
from typing import List, Union, Optional, Callable
|
||||
from packaging import version
|
||||
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
|
||||
from .file import (
|
||||
get_or_create_path,
|
||||
save_multiple_parts_file,
|
||||
unpack_archive_with_buffer,
|
||||
get_tmp_file_with_buffer,
|
||||
)
|
||||
from ..config import C
|
||||
from ..log import get_module_logger, set_log_with_config
|
||||
|
||||
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
|
||||
#################### Server ####################
|
||||
def get_redis_connection():
|
||||
"""get redis connection instance."""
|
||||
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
|
||||
return redis.StrictRedis(
|
||||
host=C.redis_host,
|
||||
port=C.redis_port,
|
||||
db=C.redis_task_db,
|
||||
password=C.redis_password,
|
||||
)
|
||||
|
||||
|
||||
#################### Data ####################
|
||||
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
|
||||
return offset
|
||||
|
||||
|
||||
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
|
||||
def read_period_data(
|
||||
index_path,
|
||||
data_path,
|
||||
period,
|
||||
cur_date_int: int,
|
||||
quarterly,
|
||||
last_period_index: int = None,
|
||||
):
|
||||
"""
|
||||
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
|
||||
Only the updating info before cur_date or at cur_date will be used.
|
||||
@@ -273,7 +290,10 @@ def parse_field(field):
|
||||
# \uff09 -> )
|
||||
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
|
||||
for pattern, new in [
|
||||
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $
|
||||
(
|
||||
rf"\$\$([\w{chinese_punctuation_regex}]+)",
|
||||
r'PFeature("\1")',
|
||||
), # $$ must be before $
|
||||
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
|
||||
(r"(\w+\s*)\(", r"Operators.\1("),
|
||||
]: # Features # Operators
|
||||
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
|
||||
return calendar
|
||||
|
||||
|
||||
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
|
||||
def get_date_by_shift(
|
||||
trading_date,
|
||||
shift,
|
||||
future=False,
|
||||
clip_shift=True,
|
||||
freq="day",
|
||||
align: Optional[str] = None,
|
||||
):
|
||||
"""get trading date with shift bias will cur_date
|
||||
e.g. : shift == 1, return next trading date
|
||||
shift == -1, return previous trading date
|
||||
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
|
||||
# check instruments
|
||||
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
|
||||
_instrument = instruments_dir.joinpath("all.txt")
|
||||
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
|
||||
# Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
|
||||
miss_code = set(
|
||||
pd.read_csv(
|
||||
_instrument,
|
||||
sep="\t",
|
||||
header=None,
|
||||
keep_default_na=False,
|
||||
na_values={
|
||||
0: [
|
||||
" ",
|
||||
"#N/A",
|
||||
"#N/A N/A",
|
||||
"#NA",
|
||||
"-1.#IND",
|
||||
"-1.#QNAN",
|
||||
"-NaN",
|
||||
"-nan",
|
||||
"1.#IND",
|
||||
"1.#QNAN",
|
||||
"<NA>",
|
||||
"N/A",
|
||||
"NaN",
|
||||
"None",
|
||||
"n/a",
|
||||
"nan",
|
||||
"null ",
|
||||
]
|
||||
},
|
||||
)
|
||||
.loc[:, 0]
|
||||
.apply(str.lower)
|
||||
) - set(code_names)
|
||||
if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
|
||||
return False
|
||||
|
||||
|
||||
@@ -90,7 +90,6 @@ class OnlineStrategy:
|
||||
|
||||
|
||||
class RollingStrategy(OnlineStrategy):
|
||||
|
||||
"""
|
||||
This example strategy always uses the latest rolling model sas online models.
|
||||
"""
|
||||
|
||||
@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
|
||||
today = pd.Timestamp.now()
|
||||
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
|
||||
ret_list = []
|
||||
col = ["date", "symbol", "code_name"]
|
||||
for date in tqdm(date_range, desc="Download CSI500"):
|
||||
rs = bs.query_zz500_stocks(date=str(date))
|
||||
zz500_stocks = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
zz500_stocks.append(rs.get_row_data())
|
||||
result = pd.DataFrame(zz500_stocks, columns=col)
|
||||
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
|
||||
result = self.get_data_from_baostock(date)
|
||||
ret_list.append(result[["date", "symbol"]])
|
||||
bs.logout()
|
||||
|
||||
@@ -146,9 +146,7 @@ class DumpDataBase:
|
||||
return (
|
||||
self._include_fields
|
||||
if self._include_fields
|
||||
else set(df_columns) - set(self._exclude_fields)
|
||||
if self._exclude_fields
|
||||
else df_columns
|
||||
else set(df_columns) - set(self._exclude_fields) if self._exclude_fields else df_columns
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"""
|
||||
TODO:
|
||||
- A more well-designed PIT database is required.
|
||||
- seperated insert, delete, update, query operations are required.
|
||||
- separated insert, delete, update, query operations are required.
|
||||
"""
|
||||
|
||||
import shutil
|
||||
@@ -132,9 +132,11 @@ class DumpPitData:
|
||||
return (
|
||||
set(self._include_fields)
|
||||
if self._include_fields
|
||||
else set(df[self.field_column_name]) - set(self._exclude_fields)
|
||||
if self._exclude_fields
|
||||
else set(df[self.field_column_name])
|
||||
else (
|
||||
set(df[self.field_column_name]) - set(self._exclude_fields)
|
||||
if self._exclude_fields
|
||||
else set(df[self.field_column_name])
|
||||
)
|
||||
)
|
||||
|
||||
def get_filenames(self, symbol, field, interval):
|
||||
|
||||
2
setup.py
2
setup.py
@@ -65,6 +65,8 @@ REQUIRED = [
|
||||
# To ensure stable operation of the experiment manager, we have limited the version of mlflow,
|
||||
# and we need to verify whether version 2.0 of mlflow can serve qlib properly.
|
||||
"mlflow>=1.12.1, <=1.30.0",
|
||||
# mlflow 1.30.0 requires packaging<22, so we limit the packaging version, otherwise the CI will fail.
|
||||
"packaging<22",
|
||||
"tqdm",
|
||||
"loguru",
|
||||
"lightgbm>=3.3.0",
|
||||
|
||||
@@ -9,7 +9,9 @@ from qlib.tests import TestAutoData
|
||||
|
||||
|
||||
class WorkflowTest(TestAutoData):
|
||||
TMP_PATH = Path("./.mlruns_tmp/")
|
||||
# Creating the directory manually doesn't work with mlflow,
|
||||
# so we add a subfolder named .trash when we create the directory.
|
||||
TMP_PATH = Path("./.mlruns_tmp/.trash")
|
||||
|
||||
def tearDown(self) -> None:
|
||||
if self.TMP_PATH.exists():
|
||||
@@ -17,6 +19,8 @@ class WorkflowTest(TestAutoData):
|
||||
|
||||
def test_get_local_dir(self):
|
||||
""" """
|
||||
self.TMP_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with R.start(uri=str(self.TMP_PATH)):
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user