1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Merge branch 'main' of github.com:microsoft/qlib into fix_docs

This commit is contained in:
Linlang Lv (iSoftStone Information)
2024-05-17 17:42:15 +08:00
24 changed files with 122 additions and 58 deletions

View File

@@ -51,8 +51,8 @@ jobs:
python setup.py bdist_wheel
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
twine upload dist/*
@@ -72,10 +72,10 @@ jobs:
python-version: 3.7
- name: Install dependencies
run: |
pip install twine
pip install twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
twine upload dist/pyqlib-*-manylinux*.whl

View File

@@ -6,8 +6,14 @@ on:
branches:
- main
permissions:
contents: read
jobs:
update_release_draft:
permissions:
contents: write
pull-requests: read
runs-on: ubuntu-latest
steps:
# Drafts your next Release notes as Pull Requests are merged into "master"

View File

@@ -13,7 +13,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

View File

@@ -14,7 +14,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

View File

@@ -14,7 +14,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp
./pretrain
.idea/
.idea/

View File

@@ -172,6 +172,8 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully.
## Data Preparation
Load and prepare data by running the following code:

View File

@@ -324,7 +324,6 @@ class TRAModel(Model):
class LSTM(nn.Module):
"""LSTM Model
Args:
@@ -414,7 +413,6 @@ class PositionalEncoding(nn.Module):
class Transformer(nn.Module):
"""Transformer Model
Args:
@@ -475,7 +473,6 @@ class Transformer(nn.Module):
class TRA(nn.Module):
"""Temporal Routing Adaptor (TRA)
TRA takes historical prediction errors & latent representation as inputs,

View File

@@ -27,13 +27,11 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency
2. Please follow following steps to download example data
```bash
cd examples/orderbook_data/
wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
tar xf highfreq_orderboook_example_data.tar.bz2
python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
```
3. Please import the example data to your mongo db
```bash
cd examples/orderbook_data/
python create_dataset.py initialize_library # Initialization Libraries
python create_dataset.py import_data # Initialization Libraries
```
@@ -42,7 +40,6 @@ python create_dataset.py import_data # Initialization Libraries
After importing these data, you run `example.py` to create some high-frequency features.
```bash
cd examples/orderbook_data/
pytest -s --disable-warnings example.py # If you want run all examples
pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example
```

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License.
from pathlib import Path
__version__ = "0.9.3.99"
__version__ = "0.9.4.99"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os
from typing import Union

View File

@@ -162,13 +162,15 @@ def create_account_instance(
init_cash=init_cash,
position_dict=position_dict,
pos_type=pos_type,
benchmark_config={}
if benchmark is None
else {
"benchmark": benchmark,
"start_time": start_time,
"end_time": end_time,
},
benchmark_config=(
{}
if benchmark is None
else {
"benchmark": benchmark,
"start_time": start_time,
"end_time": end_time,
}
),
)

View File

@@ -622,9 +622,11 @@ class Indicator:
print(
"[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
freq,
trade_start_time
if isinstance(trade_start_time, str)
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"),
(
trade_start_time
if isinstance(trade_start_time, str)
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
),
fulfill_rate,
price_advantage,
positive_rate,

View File

@@ -3,6 +3,7 @@ Here is a batch of evaluation functions.
The interface should be redesigned carefully in the future.
"""
import pandas as pd
from typing import Tuple
from qlib import get_module_logger

View File

@@ -511,7 +511,6 @@ class TRAModel(Model):
class RNN(nn.Module):
"""RNN Model
Args:
@@ -601,7 +600,6 @@ class PositionalEncoding(nn.Module):
class Transformer(nn.Module):
"""Transformer Model
Args:
@@ -649,7 +647,6 @@ class Transformer(nn.Module):
class TRA(nn.Module):
"""Temporal Routing Adaptor (TRA)
TRA takes historical prediction errors & latent representation as inputs,

View File

@@ -373,7 +373,6 @@ class WeightStrategyBase(BaseSignalStrategy):
class EnhancedIndexingStrategy(WeightStrategyBase):
"""Enhanced Indexing Strategy
Enhanced indexing combines the arts of active management and passive management,

View File

@@ -30,7 +30,6 @@ class Ensemble:
class SingleKeyEnsemble(Ensemble):
"""
Extract the object if there is only one key and value in the dict. Make the result more readable.
{Only key: Only value} -> Only value
@@ -64,7 +63,6 @@ class SingleKeyEnsemble(Ensemble):
class RollingEnsemble(Ensemble):
"""Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble.
NOTE: The values of dict must be pd.DataFrame, and have the index "datetime".

View File

@@ -247,9 +247,7 @@ class ShrinkCovEstimator(RiskModel):
v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
v3 = z.T.dot(z) / t - var_mkt * S
roff3 = (
np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
)
roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
roff = 2 * roff1 - roff3
rho = rdiag + roff

View File

@@ -25,7 +25,12 @@ import pandas as pd
from pathlib import Path
from typing import List, Union, Optional, Callable
from packaging import version
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
from .file import (
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C
from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server ####################
def get_redis_connection():
"""get redis connection instance."""
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
return redis.StrictRedis(
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
return offset
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
def read_period_data(
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
"""
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
# \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $
(
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
def get_date_by_shift(
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date
shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
# check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt")
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
# Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False

View File

@@ -90,7 +90,6 @@ class OnlineStrategy:
class RollingStrategy(OnlineStrategy):
"""
This example strategy always uses the latest rolling model sas online models.
"""

View File

@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
today = pd.Timestamp.now()
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
ret_list = []
col = ["date", "symbol", "code_name"]
for date in tqdm(date_range, desc="Download CSI500"):
rs = bs.query_zz500_stocks(date=str(date))
zz500_stocks = []
while (rs.error_code == "0") & rs.next():
zz500_stocks.append(rs.get_row_data())
result = pd.DataFrame(zz500_stocks, columns=col)
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
result = self.get_data_from_baostock(date)
ret_list.append(result[["date", "symbol"]])
bs.logout()

View File

@@ -146,9 +146,7 @@ class DumpDataBase:
return (
self._include_fields
if self._include_fields
else set(df_columns) - set(self._exclude_fields)
if self._exclude_fields
else df_columns
else set(df_columns) - set(self._exclude_fields) if self._exclude_fields else df_columns
)
@staticmethod

View File

@@ -3,7 +3,7 @@
"""
TODO:
- A more well-designed PIT database is required.
- seperated insert, delete, update, query operations are required.
- separated insert, delete, update, query operations are required.
"""
import shutil
@@ -132,9 +132,11 @@ class DumpPitData:
return (
set(self._include_fields)
if self._include_fields
else set(df[self.field_column_name]) - set(self._exclude_fields)
if self._exclude_fields
else set(df[self.field_column_name])
else (
set(df[self.field_column_name]) - set(self._exclude_fields)
if self._exclude_fields
else set(df[self.field_column_name])
)
)
def get_filenames(self, symbol, field, interval):

View File

@@ -65,6 +65,8 @@ REQUIRED = [
# To ensure stable operation of the experiment manager, we have limited the version of mlflow,
# and we need to verify whether version 2.0 of mlflow can serve qlib properly.
"mlflow>=1.12.1, <=1.30.0",
# mlflow 1.30.0 requires packaging<22, so we limit the packaging version, otherwise the CI will fail.
"packaging<22",
"tqdm",
"loguru",
"lightgbm>=3.3.0",

View File

@@ -9,7 +9,9 @@ from qlib.tests import TestAutoData
class WorkflowTest(TestAutoData):
TMP_PATH = Path("./.mlruns_tmp/")
# Creating the directory manually doesn't work with mlflow,
# so we add a subfolder named .trash when we create the directory.
TMP_PATH = Path("./.mlruns_tmp/.trash")
def tearDown(self) -> None:
if self.TMP_PATH.exists():
@@ -17,6 +19,8 @@ class WorkflowTest(TestAutoData):
def test_get_local_dir(self):
""" """
self.TMP_PATH.mkdir(parents=True, exist_ok=True)
with R.start(uri=str(self.TMP_PATH)):
pass