1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Merge branch 'main' of github.com:microsoft/qlib into fix_docs

This commit is contained in:
Linlang Lv (iSoftStone Information)
2024-05-17 17:42:15 +08:00
24 changed files with 122 additions and 58 deletions

View File

@@ -51,8 +51,8 @@ jobs:
python setup.py bdist_wheel python setup.py bdist_wheel
- name: Build and publish - name: Build and publish
env: env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: | run: |
twine upload dist/* twine upload dist/*
@@ -72,10 +72,10 @@ jobs:
python-version: 3.7 python-version: 3.7
- name: Install dependencies - name: Install dependencies
run: | run: |
pip install twine pip install twine
- name: Build and publish - name: Build and publish
env: env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: | run: |
twine upload dist/pyqlib-*-manylinux*.whl twine upload dist/pyqlib-*-manylinux*.whl

View File

@@ -6,8 +6,14 @@ on:
branches: branches:
- main - main
permissions:
contents: read
jobs: jobs:
update_release_draft: update_release_draft:
permissions:
contents: write
pull-requests: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
# Drafts your next Release notes as Pull Requests are merged into "master" # Drafts your next Release notes as Pull Requests are merged into "master"

View File

@@ -13,7 +13,10 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest] # Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8] python-version: [3.7, 3.8]

View File

@@ -14,7 +14,10 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest] # Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8] python-version: [3.7, 3.8]

View File

@@ -14,7 +14,10 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest] # Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8] python-version: [3.7, 3.8]

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp *.swp
./pretrain ./pretrain
.idea/ .idea/

View File

@@ -172,6 +172,8 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
**Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem. **Tips**: If you fail to install `Qlib` or run the examples in your environment, comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully.
## Data Preparation ## Data Preparation
Load and prepare data by running the following code: Load and prepare data by running the following code:

View File

@@ -324,7 +324,6 @@ class TRAModel(Model):
class LSTM(nn.Module): class LSTM(nn.Module):
"""LSTM Model """LSTM Model
Args: Args:
@@ -414,7 +413,6 @@ class PositionalEncoding(nn.Module):
class Transformer(nn.Module): class Transformer(nn.Module):
"""Transformer Model """Transformer Model
Args: Args:
@@ -475,7 +473,6 @@ class Transformer(nn.Module):
class TRA(nn.Module): class TRA(nn.Module):
"""Temporal Routing Adaptor (TRA) """Temporal Routing Adaptor (TRA)
TRA takes historical prediction errors & latent representation as inputs, TRA takes historical prediction errors & latent representation as inputs,

View File

@@ -27,13 +27,11 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency
2. Please follow following steps to download example data 2. Please follow following steps to download example data
```bash ```bash
cd examples/orderbook_data/ cd examples/orderbook_data/
wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2 python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
tar xf highfreq_orderboook_example_data.tar.bz2
``` ```
3. Please import the example data to your mongo db 3. Please import the example data to your mongo db
```bash ```bash
cd examples/orderbook_data/
python create_dataset.py initialize_library # Initialization Libraries python create_dataset.py initialize_library # Initialization Libraries
python create_dataset.py import_data # Initialization Libraries python create_dataset.py import_data # Initialization Libraries
``` ```
@@ -42,7 +40,6 @@ python create_dataset.py import_data # Initialization Libraries
After importing these data, you run `example.py` to create some high-frequency features. After importing these data, you run `example.py` to create some high-frequency features.
```bash ```bash
cd examples/orderbook_data/
pytest -s --disable-warnings example.py # If you want run all examples pytest -s --disable-warnings example.py # If you want run all examples
pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example pytest -s --disable-warnings example.py::TestClass::test_exp_10 # If you want to run specific example
``` ```

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License. # Licensed under the MIT License.
from pathlib import Path from pathlib import Path
__version__ = "0.9.3.99" __version__ = "0.9.4.99"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version __version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os import os
from typing import Union from typing import Union

View File

@@ -162,13 +162,15 @@ def create_account_instance(
init_cash=init_cash, init_cash=init_cash,
position_dict=position_dict, position_dict=position_dict,
pos_type=pos_type, pos_type=pos_type,
benchmark_config={} benchmark_config=(
if benchmark is None {}
else { if benchmark is None
"benchmark": benchmark, else {
"start_time": start_time, "benchmark": benchmark,
"end_time": end_time, "start_time": start_time,
}, "end_time": end_time,
}
),
) )

View File

@@ -622,9 +622,11 @@ class Indicator:
print( print(
"[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format( "[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
freq, freq,
trade_start_time (
if isinstance(trade_start_time, str) trade_start_time
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"), if isinstance(trade_start_time, str)
else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
),
fulfill_rate, fulfill_rate,
price_advantage, price_advantage,
positive_rate, positive_rate,

View File

@@ -3,6 +3,7 @@ Here is a batch of evaluation functions.
The interface should be redesigned carefully in the future. The interface should be redesigned carefully in the future.
""" """
import pandas as pd import pandas as pd
from typing import Tuple from typing import Tuple
from qlib import get_module_logger from qlib import get_module_logger

View File

@@ -511,7 +511,6 @@ class TRAModel(Model):
class RNN(nn.Module): class RNN(nn.Module):
"""RNN Model """RNN Model
Args: Args:
@@ -601,7 +600,6 @@ class PositionalEncoding(nn.Module):
class Transformer(nn.Module): class Transformer(nn.Module):
"""Transformer Model """Transformer Model
Args: Args:
@@ -649,7 +647,6 @@ class Transformer(nn.Module):
class TRA(nn.Module): class TRA(nn.Module):
"""Temporal Routing Adaptor (TRA) """Temporal Routing Adaptor (TRA)
TRA takes historical prediction errors & latent representation as inputs, TRA takes historical prediction errors & latent representation as inputs,

View File

@@ -373,7 +373,6 @@ class WeightStrategyBase(BaseSignalStrategy):
class EnhancedIndexingStrategy(WeightStrategyBase): class EnhancedIndexingStrategy(WeightStrategyBase):
"""Enhanced Indexing Strategy """Enhanced Indexing Strategy
Enhanced indexing combines the arts of active management and passive management, Enhanced indexing combines the arts of active management and passive management,

View File

@@ -30,7 +30,6 @@ class Ensemble:
class SingleKeyEnsemble(Ensemble): class SingleKeyEnsemble(Ensemble):
""" """
Extract the object if there is only one key and value in the dict. Make the result more readable. Extract the object if there is only one key and value in the dict. Make the result more readable.
{Only key: Only value} -> Only value {Only key: Only value} -> Only value
@@ -64,7 +63,6 @@ class SingleKeyEnsemble(Ensemble):
class RollingEnsemble(Ensemble): class RollingEnsemble(Ensemble):
"""Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble. """Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble.
NOTE: The values of dict must be pd.DataFrame, and have the index "datetime". NOTE: The values of dict must be pd.DataFrame, and have the index "datetime".

View File

@@ -247,9 +247,7 @@ class ShrinkCovEstimator(RiskModel):
v1 = y.T.dot(z) / t - cov_mkt[:, None] * S v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
v3 = z.T.dot(z) / t - var_mkt * S v3 = z.T.dot(z) / t - var_mkt * S
roff3 = ( roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
)
roff = 2 * roff1 - roff3 roff = 2 * roff1 - roff3
rho = rdiag + roff rho = rdiag + roff

View File

@@ -25,7 +25,12 @@ import pandas as pd
from pathlib import Path from pathlib import Path
from typing import List, Union, Optional, Callable from typing import List, Union, Optional, Callable
from packaging import version from packaging import version
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer from .file import (
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C from ..config import C
from ..log import get_module_logger, set_log_with_config from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server #################### #################### Server ####################
def get_redis_connection(): def get_redis_connection():
"""get redis connection instance.""" """get redis connection instance."""
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password) return redis.StrictRedis(
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data #################### #################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
return offset return offset
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None): def read_period_data(
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
""" """
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803). At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used. Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
# \uff09 -> ) # \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09" chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [ for pattern, new in [
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $ (
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'), (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("), (r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators ]: # Features # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar return calendar
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None): def get_date_by_shift(
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date """get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date e.g. : shift == 1, return next trading date
shift == -1, return previous trading date shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
# check instruments # check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir())) code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt") _instrument = instruments_dir.joinpath("all.txt")
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names) # Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)): if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False return False

View File

@@ -90,7 +90,6 @@ class OnlineStrategy:
class RollingStrategy(OnlineStrategy): class RollingStrategy(OnlineStrategy):
""" """
This example strategy always uses the latest rolling model sas online models. This example strategy always uses the latest rolling model sas online models.
""" """

View File

@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
today = pd.Timestamp.now() today = pd.Timestamp.now()
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
ret_list = [] ret_list = []
col = ["date", "symbol", "code_name"]
for date in tqdm(date_range, desc="Download CSI500"): for date in tqdm(date_range, desc="Download CSI500"):
rs = bs.query_zz500_stocks(date=str(date))
zz500_stocks = []
while (rs.error_code == "0") & rs.next():
zz500_stocks.append(rs.get_row_data())
result = pd.DataFrame(zz500_stocks, columns=col)
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
result = self.get_data_from_baostock(date) result = self.get_data_from_baostock(date)
ret_list.append(result[["date", "symbol"]]) ret_list.append(result[["date", "symbol"]])
bs.logout() bs.logout()

View File

@@ -146,9 +146,7 @@ class DumpDataBase:
return ( return (
self._include_fields self._include_fields
if self._include_fields if self._include_fields
else set(df_columns) - set(self._exclude_fields) else set(df_columns) - set(self._exclude_fields) if self._exclude_fields else df_columns
if self._exclude_fields
else df_columns
) )
@staticmethod @staticmethod

View File

@@ -3,7 +3,7 @@
""" """
TODO: TODO:
- A more well-designed PIT database is required. - A more well-designed PIT database is required.
- seperated insert, delete, update, query operations are required. - separated insert, delete, update, query operations are required.
""" """
import shutil import shutil
@@ -132,9 +132,11 @@ class DumpPitData:
return ( return (
set(self._include_fields) set(self._include_fields)
if self._include_fields if self._include_fields
else set(df[self.field_column_name]) - set(self._exclude_fields) else (
if self._exclude_fields set(df[self.field_column_name]) - set(self._exclude_fields)
else set(df[self.field_column_name]) if self._exclude_fields
else set(df[self.field_column_name])
)
) )
def get_filenames(self, symbol, field, interval): def get_filenames(self, symbol, field, interval):

View File

@@ -65,6 +65,8 @@ REQUIRED = [
# To ensure stable operation of the experiment manager, we have limited the version of mlflow, # To ensure stable operation of the experiment manager, we have limited the version of mlflow,
# and we need to verify whether version 2.0 of mlflow can serve qlib properly. # and we need to verify whether version 2.0 of mlflow can serve qlib properly.
"mlflow>=1.12.1, <=1.30.0", "mlflow>=1.12.1, <=1.30.0",
# mlflow 1.30.0 requires packaging<22, so we limit the packaging version, otherwise the CI will fail.
"packaging<22",
"tqdm", "tqdm",
"loguru", "loguru",
"lightgbm>=3.3.0", "lightgbm>=3.3.0",

View File

@@ -9,7 +9,9 @@ from qlib.tests import TestAutoData
class WorkflowTest(TestAutoData): class WorkflowTest(TestAutoData):
TMP_PATH = Path("./.mlruns_tmp/") # Creating the directory manually doesn't work with mlflow,
# so we add a subfolder named .trash when we create the directory.
TMP_PATH = Path("./.mlruns_tmp/.trash")
def tearDown(self) -> None: def tearDown(self) -> None:
if self.TMP_PATH.exists(): if self.TMP_PATH.exists():
@@ -17,6 +19,8 @@ class WorkflowTest(TestAutoData):
def test_get_local_dir(self): def test_get_local_dir(self):
""" """ """ """
self.TMP_PATH.mkdir(parents=True, exist_ok=True)
with R.start(uri=str(self.TMP_PATH)): with R.start(uri=str(self.TMP_PATH)):
pass pass