Merge branch 'main' of github.com:microsoft/qlib into fix_docs

2026-07-21 11:17:34 +08:00 · 2024-05-17 17:42:15 +08:00
parent 77d34e080b 917e3a725e
commit 091f542b42
24 changed files with 122 additions and 58 deletions
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -51,8 +51,8 @@ jobs:
        python setup.py bdist_wheel
    - name: Build and publish
      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
      run: |
        twine upload dist/*

@@ -72,10 +72,10 @@ jobs:
        python-version: 3.7
    - name: Install dependencies
      run: |
-        pip install twine  
+        pip install twine
    - name: Build and publish
      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
      run: |
        twine upload dist/pyqlib-*-manylinux*.whl
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@@ -6,8 +6,14 @@ on:
    branches:
      - main

+permissions:
+  contents: read
+
 jobs:
  update_release_draft:
+    permissions:
+      contents: write
+      pull-requests: read
    runs-on: ubuntu-latest
    steps:
      # Drafts your next Release notes as Pull Requests are merged into "master"
--- a/.github/workflows/test_qlib_from_pip.yml
+++ b/.github/workflows/test_qlib_from_pip.yml
@@ -13,7 +13,10 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
        python-version: [3.7, 3.8]

--- a/.github/workflows/test_qlib_from_source.yml
+++ b/.github/workflows/test_qlib_from_source.yml
@@ -14,7 +14,10 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
        python-version: [3.7, 3.8]

--- a/.github/workflows/test_qlib_from_source_slow.yml
+++ b/.github/workflows/test_qlib_from_source_slow.yml
@@ -14,7 +14,10 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
        python-version: [3.7, 3.8]

--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,4 @@ tags
 *.swp

 ./pretrain
-.idea/
+.idea/
--- a/README.md
+++ b/README.md
@@ -172,6 +172,8 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor

 **Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.

+**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully. 
+
 ## Data Preparation
 Load and prepare data by running the following code:

--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@@ -324,7 +324,6 @@ class TRAModel(Model):


 class LSTM(nn.Module):
-
    """LSTM Model

    Args:
@@ -414,7 +413,6 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
-
    """Transformer Model

    Args:
@@ -475,7 +473,6 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
-
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/examples/orderbook_data/README.md
+++ b/examples/orderbook_data/README.md
@@ -27,13 +27,11 @@ pip install arctic  # NOTE: pip may fail to resolve the right package dependency
 2. Please follow following steps to download example data
 ```bash
 cd examples/orderbook_data/
-wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
-tar xf highfreq_orderboook_example_data.tar.bz2
+python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
 ```

 3. Please import the example data to your mongo db
 ```bash
-cd examples/orderbook_data/
 python create_dataset.py initialize_library  # Initialization Libraries
 python create_dataset.py import_data  # Initialization Libraries
 ```
@@ -42,7 +40,6 @@ python create_dataset.py import_data  # Initialization Libraries

 After importing these data, you run `example.py` to create some high-frequency features.
 ```bash
-cd examples/orderbook_data/
 pytest -s --disable-warnings example.py   # If you want run all examples
 pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
 ```
--- a/qlib/init.py
+++ b/qlib/init.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 from pathlib import Path

-__version__ = "0.9.3.99"
+__version__ = "0.9.4.99"
 __version__bak = __version__  # This version is backup for QlibConfig.reset_qlib_version
 import os
 from typing import Union
--- a/qlib/backtest/init.py
+++ b/qlib/backtest/init.py
@@ -162,13 +162,15 @@ def create_account_instance(
        init_cash=init_cash,
        position_dict=position_dict,
        pos_type=pos_type,
-        benchmark_config={}
-        if benchmark is None
-        else {
-            "benchmark": benchmark,
-            "start_time": start_time,
-            "end_time": end_time,
-        },
+        benchmark_config=(
+            {}
+            if benchmark is None
+            else {
+                "benchmark": benchmark,
+                "start_time": start_time,
+                "end_time": end_time,
+            }
+        ),
    )


--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -622,9 +622,11 @@ class Indicator:
            print(
                "[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
                    freq,
-                    trade_start_time
-                    if isinstance(trade_start_time, str)
-                    else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"),
+                    (
+                        trade_start_time
+                        if isinstance(trade_start_time, str)
+                        else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
+                    ),
                    fulfill_rate,
                    price_advantage,
                    positive_rate,
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -3,6 +3,7 @@ Here is a batch of evaluation functions.

 The interface should be redesigned carefully in the future.
 """
+
 import pandas as pd
 from typing import Tuple
 from qlib import get_module_logger
--- a/qlib/contrib/model/pytorch_tra.py
+++ b/qlib/contrib/model/pytorch_tra.py
@@ -511,7 +511,6 @@ class TRAModel(Model):


 class RNN(nn.Module):
-
    """RNN Model

    Args:
@@ -601,7 +600,6 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
-
    """Transformer Model

    Args:
@@ -649,7 +647,6 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
-
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/qlib/contrib/strategy/signal_strategy.py
+++ b/qlib/contrib/strategy/signal_strategy.py
@@ -373,7 +373,6 @@ class WeightStrategyBase(BaseSignalStrategy):


 class EnhancedIndexingStrategy(WeightStrategyBase):
-
    """Enhanced Indexing Strategy

    Enhanced indexing combines the arts of active management and passive management,
--- a/qlib/model/ens/ensemble.py
+++ b/qlib/model/ens/ensemble.py
@@ -30,7 +30,6 @@ class Ensemble:


 class SingleKeyEnsemble(Ensemble):
-
    """
    Extract the object if there is only one key and value in the dict. Make the result more readable.
    {Only key: Only value} -> Only value
@@ -64,7 +63,6 @@ class SingleKeyEnsemble(Ensemble):


 class RollingEnsemble(Ensemble):
-
    """Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble.

    NOTE: The values of dict must be pd.DataFrame, and have the index "datetime".
--- a/qlib/model/riskmodel/shrink.py
+++ b/qlib/model/riskmodel/shrink.py
@@ -247,9 +247,7 @@ class ShrinkCovEstimator(RiskModel):
        v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
        roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
        v3 = z.T.dot(z) / t - var_mkt * S
-        roff3 = (
-            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
-        )
+        roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
        roff = 2 * roff1 - roff3
        rho = rdiag + roff

--- a/qlib/utils/init.py
+++ b/qlib/utils/init.py
@@ -25,7 +25,12 @@ import pandas as pd
 from pathlib import Path
 from typing import List, Union, Optional, Callable
 from packaging import version
-from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
+from .file import (
+    get_or_create_path,
+    save_multiple_parts_file,
+    unpack_archive_with_buffer,
+    get_tmp_file_with_buffer,
+)
 from ..config import C
 from ..log import get_module_logger, set_log_with_config

@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
 #################### Server ####################
 def get_redis_connection():
    """get redis connection instance."""
-    return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
+    return redis.StrictRedis(
+        host=C.redis_host,
+        port=C.redis_port,
+        db=C.redis_task_db,
+        password=C.redis_password,
+    )


 #################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
    return offset


-def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
+def read_period_data(
+    index_path,
+    data_path,
+    period,
+    cur_date_int: int,
+    quarterly,
+    last_period_index: int = None,
+):
    """
    At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
    Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
    # \uff09 -> )
    chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
    for pattern, new in [
-        (rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'),  # $$ must be before $
+        (
+            rf"\$\$([\w{chinese_punctuation_regex}]+)",
+            r'PFeature("\1")',
+        ),  # $$ must be before $
        (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
        (r"(\w+\s*)\(", r"Operators.\1("),
    ]:  # Features  # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
    return calendar


-def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
+def get_date_by_shift(
+    trading_date,
+    shift,
+    future=False,
+    clip_shift=True,
+    freq="day",
+    align: Optional[str] = None,
+):
    """get trading date with shift bias will cur_date
        e.g. : shift == 1,  return next trading date
               shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
    # check instruments
    code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
    _instrument = instruments_dir.joinpath("all.txt")
-    miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
+    # Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
+    miss_code = set(
+        pd.read_csv(
+            _instrument,
+            sep="\t",
+            header=None,
+            keep_default_na=False,
+            na_values={
+                0: [
+                    " ",
+                    "#N/A",
+                    "#N/A N/A",
+                    "#NA",
+                    "-1.#IND",
+                    "-1.#QNAN",
+                    "-NaN",
+                    "-nan",
+                    "1.#IND",
+                    "1.#QNAN",
+                    "<NA>",
+                    "N/A",
+                    "NaN",
+                    "None",
+                    "n/a",
+                    "nan",
+                    "null ",
+                ]
+            },
+        )
+        .loc[:, 0]
+        .apply(str.lower)
+    ) - set(code_names)
    if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
        return False

--- a/qlib/workflow/online/strategy.py
+++ b/qlib/workflow/online/strategy.py
@@ -90,7 +90,6 @@ class OnlineStrategy:


 class RollingStrategy(OnlineStrategy):
-
    """
    This example strategy always uses the latest rolling model sas online models.
    """
--- a/scripts/data_collector/cn_index/collector.py
+++ b/scripts/data_collector/cn_index/collector.py
@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
        today = pd.Timestamp.now()
        date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
        ret_list = []
-        col = ["date", "symbol", "code_name"]
        for date in tqdm(date_range, desc="Download CSI500"):
-            rs = bs.query_zz500_stocks(date=str(date))
-            zz500_stocks = []
-            while (rs.error_code == "0") & rs.next():
-                zz500_stocks.append(rs.get_row_data())
-            result = pd.DataFrame(zz500_stocks, columns=col)
-            result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
            result = self.get_data_from_baostock(date)
            ret_list.append(result[["date", "symbol"]])
        bs.logout()
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -146,9 +146,7 @@ class DumpDataBase:
        return (
            self._include_fields
            if self._include_fields
-            else set(df_columns) - set(self._exclude_fields)
-            if self._exclude_fields
-            else df_columns
+            else set(df_columns) - set(self._exclude_fields) if self._exclude_fields else df_columns
        )

    @staticmethod
--- a/scripts/dump_pit.py
+++ b/scripts/dump_pit.py
@@ -3,7 +3,7 @@
 """
 TODO:
 - A more well-designed PIT database is required.
-    - seperated insert, delete, update, query operations are required.
+    - separated insert, delete, update, query operations are required.
 """

 import shutil
@@ -132,9 +132,11 @@ class DumpPitData:
        return (
            set(self._include_fields)
            if self._include_fields
-            else set(df[self.field_column_name]) - set(self._exclude_fields)
-            if self._exclude_fields
-            else set(df[self.field_column_name])
+            else (
+                set(df[self.field_column_name]) - set(self._exclude_fields)
+                if self._exclude_fields
+                else set(df[self.field_column_name])
+            )
        )

    def get_filenames(self, symbol, field, interval):
--- a/setup.py
+++ b/setup.py
@@ -65,6 +65,8 @@ REQUIRED = [
    # To ensure stable operation of the experiment manager, we have limited the version of mlflow,
    # and we need to verify whether version 2.0 of mlflow can serve qlib properly.
    "mlflow>=1.12.1, <=1.30.0",
+    # mlflow 1.30.0 requires packaging<22, so we limit the packaging version, otherwise the CI will fail.
+    "packaging<22",
    "tqdm",
    "loguru",
    "lightgbm>=3.3.0",
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@@ -9,7 +9,9 @@ from qlib.tests import TestAutoData


 class WorkflowTest(TestAutoData):
-    TMP_PATH = Path("./.mlruns_tmp/")
+    # Creating the directory manually doesn't work with mlflow,
+    # so we add a subfolder named .trash when we create the directory.
+    TMP_PATH = Path("./.mlruns_tmp/.trash")

    def tearDown(self) -> None:
        if self.TMP_PATH.exists():
@@ -17,6 +19,8 @@ class WorkflowTest(TestAutoData):

    def test_get_local_dir(self):
        """ """
+        self.TMP_PATH.mkdir(parents=True, exist_ok=True)
+
        with R.start(uri=str(self.TMP_PATH)):
            pass