download orderbook data (#1754)

* download orderbook data * fix CI error * fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * test fix CI error * optimize get_data code * optimize get_data code * optimize get_data code * optimize README --------- Co-authored-by: Linlang <v-linlanglv@microsoft.com>
2026-07-21 19:27:36 +08:00 · 2024-03-07 14:41:21 +08:00
parent 98f569eed2
commit 39f88daaa7
14 changed files with 30 additions and 34 deletions
--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@@ -324,7 +324,6 @@ class TRAModel(Model):


 class LSTM(nn.Module):
-
    """LSTM Model

    Args:
@@ -414,7 +413,6 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
-
    """Transformer Model

    Args:
@@ -475,7 +473,6 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
-
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/examples/orderbook_data/README.md
+++ b/examples/orderbook_data/README.md
@@ -27,13 +27,11 @@ pip install arctic  # NOTE: pip may fail to resolve the right package dependency
 2. Please follow following steps to download example data
 ```bash
 cd examples/orderbook_data/
-wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
-tar xf highfreq_orderboook_example_data.tar.bz2
+python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
 ```

 3. Please import the example data to your mongo db
 ```bash
-cd examples/orderbook_data/
 python create_dataset.py initialize_library  # Initialization Libraries
 python create_dataset.py import_data  # Initialization Libraries
 ```
@@ -42,7 +40,6 @@ python create_dataset.py import_data  # Initialization Libraries

 After importing these data, you run `example.py` to create some high-frequency features.
 ```bash
-cd examples/orderbook_data/
 pytest -s --disable-warnings example.py   # If you want run all examples
 pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
 ```
--- a/qlib/backtest/init.py
+++ b/qlib/backtest/init.py
@@ -162,13 +162,15 @@ def create_account_instance(
        init_cash=init_cash,
        position_dict=position_dict,
        pos_type=pos_type,
-        benchmark_config={}
-        if benchmark is None
-        else {
-            "benchmark": benchmark,
-            "start_time": start_time,
-            "end_time": end_time,
-        },
+        benchmark_config=(
+            {}
+            if benchmark is None
+            else {
+                "benchmark": benchmark,
+                "start_time": start_time,
+                "end_time": end_time,
+            }
+        ),
    )


--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -622,9 +622,11 @@ class Indicator:
            print(
                "[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
                    freq,
-                    trade_start_time
-                    if isinstance(trade_start_time, str)
-                    else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"),
+                    (
+                        trade_start_time
+                        if isinstance(trade_start_time, str)
+                        else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
+                    ),
                    fulfill_rate,
                    price_advantage,
                    positive_rate,
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -3,6 +3,7 @@ Here is a batch of evaluation functions.

 The interface should be redesigned carefully in the future.
 """
+
 import pandas as pd
 from typing import Tuple
 from qlib import get_module_logger
--- a/qlib/contrib/model/pytorch_tra.py
+++ b/qlib/contrib/model/pytorch_tra.py
@@ -511,7 +511,6 @@ class TRAModel(Model):


 class RNN(nn.Module):
-
    """RNN Model

    Args:
@@ -601,7 +600,6 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
-
    """Transformer Model

    Args:
@@ -649,7 +647,6 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
-
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/qlib/contrib/strategy/signal_strategy.py
+++ b/qlib/contrib/strategy/signal_strategy.py
@@ -373,7 +373,6 @@ class WeightStrategyBase(BaseSignalStrategy):


 class EnhancedIndexingStrategy(WeightStrategyBase):
-
    """Enhanced Indexing Strategy

    Enhanced indexing combines the arts of active management and passive management,
--- a/qlib/model/ens/ensemble.py
+++ b/qlib/model/ens/ensemble.py
@@ -30,7 +30,6 @@ class Ensemble:


 class SingleKeyEnsemble(Ensemble):
-
    """
    Extract the object if there is only one key and value in the dict. Make the result more readable.
    {Only key: Only value} -> Only value
@@ -64,7 +63,6 @@ class SingleKeyEnsemble(Ensemble):


 class RollingEnsemble(Ensemble):
-
    """Merge a dict of rolling dataframe like `prediction` or `IC` into an ensemble.

    NOTE: The values of dict must be pd.DataFrame, and have the index "datetime".
--- a/qlib/model/riskmodel/shrink.py
+++ b/qlib/model/riskmodel/shrink.py
@@ -247,9 +247,7 @@ class ShrinkCovEstimator(RiskModel):
        v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
        roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
        v3 = z.T.dot(z) / t - var_mkt * S
-        roff3 = (
-            np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
-        )
+        roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
        roff = 2 * roff1 - roff3
        rho = rdiag + roff

--- a/qlib/workflow/online/strategy.py
+++ b/qlib/workflow/online/strategy.py
@@ -90,7 +90,6 @@ class OnlineStrategy:


 class RollingStrategy(OnlineStrategy):
-
    """
    This example strategy always uses the latest rolling model sas online models.
    """
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -146,9 +146,7 @@ class DumpDataBase:
        return (
            self._include_fields
            if self._include_fields
-            else set(df_columns) - set(self._exclude_fields)
-            if self._exclude_fields
-            else df_columns
+            else set(df_columns) - set(self._exclude_fields) if self._exclude_fields else df_columns
        )

    @staticmethod
--- a/scripts/dump_pit.py
+++ b/scripts/dump_pit.py
@@ -132,9 +132,11 @@ class DumpPitData:
        return (
            set(self._include_fields)
            if self._include_fields
-            else set(df[self.field_column_name]) - set(self._exclude_fields)
-            if self._exclude_fields
-            else set(df[self.field_column_name])
+            else (
+                set(df[self.field_column_name]) - set(self._exclude_fields)
+                if self._exclude_fields
+                else set(df[self.field_column_name])
+            )
        )

    def get_filenames(self, symbol, field, interval):
--- a/setup.py
+++ b/setup.py
@@ -65,6 +65,8 @@ REQUIRED = [
    # To ensure stable operation of the experiment manager, we have limited the version of mlflow,
    # and we need to verify whether version 2.0 of mlflow can serve qlib properly.
    "mlflow>=1.12.1, <=1.30.0",
+    # mlflow 1.30.0 requires packaging<22, so we limit the packaging version, otherwise the CI will fail.
+    "packaging<22",
    "tqdm",
    "loguru",
    "lightgbm>=3.3.0",
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@@ -9,7 +9,9 @@ from qlib.tests import TestAutoData


 class WorkflowTest(TestAutoData):
-    TMP_PATH = Path("./.mlruns_tmp/")
+    # Creating the directory manually doesn't work with mlflow,
+    # so we add a subfolder named .trash when we create the directory.
+    TMP_PATH = Path("./.mlruns_tmp/.trash")

    def tearDown(self) -> None:
        if self.TMP_PATH.exists():
@@ -17,6 +19,8 @@ class WorkflowTest(TestAutoData):

    def test_get_local_dir(self):
        """ """
+        self.TMP_PATH.mkdir(parents=True, exist_ok=True)
+
        with R.start(uri=str(self.TMP_PATH)):
            pass