fix logo display error

2026-06-29 00:51:19 +08:00 · 2024-06-06 13:00:49 +08:00
15 changed files with 13 additions and 79 deletions
--- a/.github/workflows/test_qlib_from_pip.yml
+++ b/.github/workflows/test_qlib_from_pip.yml
@@ -68,8 +68,5 @@ jobs:
        cd qlib

    - name: Test workflow by config
-      # On macos-11 system, it will lead to "Segmentation fault: 11" error,
-      # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here.
-      if: ${{ matrix.os != 'macos-11' }}
      run: |
        qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
--- a/.github/workflows/test_qlib_from_source.yml
+++ b/.github/workflows/test_qlib_from_source.yml
@@ -72,10 +72,8 @@ jobs:
        black . -l 120 --check --diff

    - name: Make html with sphinx
-      # Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04.
-      if: ${{ matrix.os == 'ubuntu-22.04' }}
      run: |
-        cd docs
+        cd docs 
        sphinx-build -W --keep-going -b html . _build
        cd ..

@@ -161,16 +159,11 @@ jobs:

    # Run after data downloads
    - name: Check Qlib ipynb with nbconvert
-      # Running the nbconvert check on a macos-11 system results in a "Kernel died" error, so we've temporarily disabled macos-11 here.
-      if: ${{ matrix.os != 'macos-11' }}
      run: |
        # add more ipynb files in future
        jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb

    - name: Test workflow by config (install from source)
-      # On macos-11 system, it will lead to "Segmentation fault: 11" error,
-      # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here.
-      if: ${{ matrix.os != 'macos-11' }}
      run: |
        python -m pip install numba
        python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
--- a/docs/component/model.rst
+++ b/docs/component/model.rst
@@ -86,7 +86,7 @@ Example
            },
        }

-        # model initialization
+        # model initiaiton
        model = init_instance_by_config(task["model"])
        dataset = init_instance_by_config(task["dataset"])

--- a/examples/portfolio/README.md
+++ b/examples/portfolio/README.md
@@ -20,7 +20,7 @@ We use China stock market data for our example.
 1. Prepare CSI300 weight:

   ```bash
-   wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip
+   wget http://fintech.msra.cn/stock_data/downloads/csi300_weight.zip
   unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip
   rm -f csi300_weight.zip
   ```
--- a/examples/workflow_by_code.ipynb
+++ b/examples/workflow_by_code.ipynb
@@ -161,7 +161,7 @@
    "    },\n",
    "}\n",
    "\n",
-    "# model initialization\n",
+    "# model initiaiton\n",
    "model = init_instance_by_config(task[\"model\"])\n",
    "dataset = init_instance_by_config(task[\"dataset\"])\n",
    "\n",
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -160,10 +160,6 @@ class ALSTM(Model):

        if self.metric in ("", "loss"):
            return -self.loss_fn(pred[mask], label[mask])
-        elif self.metric == "mse":
-            mask = ~torch.isnan(label)
-            weight = torch.ones_like(label)
-            return -self.mse(pred[mask], label[mask], weight[mask])

        raise ValueError("unknown metric `%s`" % self.metric)

--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -616,7 +616,7 @@ class DatasetProvider(abc.ABC):

        data = pd.DataFrame(obj)
        if not data.empty and not np.issubdtype(data.index.dtype, np.dtype("M")):
-            # If the underlaying provides the data not in datetime format, we'll convert it into datetime format
+            # If the underlaying provides the data not in datatime formmat, we'll convert it into datetime format
            _calendar = Cal.calendar(freq=freq)
            data.index = _calendar[data.index.values.astype(int)]
        data.index.names = ["datetime"]
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -403,7 +403,7 @@ class TSDataSampler:
            np.full((1, self.data_arr.shape[1]), np.nan, dtype=self.data_arr.dtype),
            axis=0,
        )
-        self.nan_idx = len(self.data_arr) - 1  # The last line is all NaN; setting it to -1 can cause bug #1716
+        self.nan_idx = -1  # The last line is all NaN

        # the data type will be changed
        # The index of usable data is between start_idx and end_idx
--- a/qlib/model/trainer.py
+++ b/qlib/model/trainer.py
@@ -41,7 +41,7 @@ def _log_task_info(task_config: dict):

 def _exe_task(task_config: dict):
    rec = R.get_recorder()
-    # model & dataset initialization
+    # model & dataset initiation
    model: Model = init_instance_by_config(task_config["model"], accept_types=Model)
    dataset: Dataset = init_instance_by_config(task_config["dataset"], accept_types=Dataset)
    reweighter: Reweighter = task_config.get("reweighter", None)
--- a/qlib/workflow/task/utils.py
+++ b/qlib/workflow/task/utils.py
@@ -242,7 +242,7 @@ class TimeAdjuster:

    def shift(self, seg: tuple, step: int, rtype=SHIFT_SD) -> tuple:
        """
-        Shift the datetime of segment
+        Shift the datatime of segment

        If there are None (which indicates unbounded index) in the segment, this method will return None.

--- a/scripts/data_collector/crypto/README.md
+++ b/scripts/data_collector/crypto/README.md
@@ -9,7 +9,7 @@ pip install -r requirements.txt
 ```

 ## Usage of the dataset
-> *Crypto dataset only support Data retrieval function but not support backtest function due to the lack of OHLC data.*
+> *Crypto dateset only support Data retrieval function but not support backtest function due to the lack of OHLC data.*

 ## Collector Data

--- a/scripts/data_collector/yahoo/collector.py
+++ b/scripts/data_collector/yahoo/collector.py
@@ -796,9 +796,6 @@ class Run(BaseRun):
            # get 1m data
            $ python collector.py download_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1m
        """
-        if self.interval == "1d" and pd.Timestamp(end) > pd.Timestamp(datetime.datetime.now().strftime("%Y-%m-%d")):
-            raise ValueError(f"end_date: {end} is greater than the current date.")
-
        super(Run, self).download_data(max_collector_count, delay, start, end, check_data_length, limit_nums)

    def normalize_data(
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ if not _CYTHON_INSTALLED:
 REQUIRED = [
    "numpy>=1.12.0, <1.24",
    "pandas>=0.25.1",
-    "scipy>=1.7.3",
+    "scipy>=1.0.0",
    "requests>=2.18.0",
    "sacred>=0.7.4",
    "python-socketio",
@@ -82,7 +82,7 @@ REQUIRED = [
    "dill",
    "dataclasses;python_version<'3.7'",
    "filelock",
-    "jinja2",
+    "jinja2<3.1.0",  # for passing the readthedocs workflow.
    "gym",
    # Installing the latest version of protobuf for python versions below 3.8 will cause unit tests to fail.
    "protobuf<=3.20.1;python_version<='3.8'",
--- a/tests/data_mid_layer_tests/test_dataset.py
+++ b/tests/data_mid_layer_tests/test_dataset.py
@@ -5,9 +5,8 @@ import unittest
 import pytest
 import sys
 from qlib.tests import TestAutoData
-from qlib.data.dataset import TSDatasetH, TSDataSampler
+from qlib.data.dataset import TSDatasetH
 import numpy as np
-import pandas as pd
 import time
 from qlib.data.dataset.handler import DataHandlerLP

@@ -99,54 +98,6 @@ class TestDataset(TestAutoData):
            print(idx[i])


-class TestTSDataSampler(unittest.TestCase):
-    def test_TSDataSampler(self):
-        """
-        Test TSDataSampler for issue #1716
-        """
-        datetime_list = ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30", "2000-05-31"]
-        instruments = ["000001", "000002", "000003", "000004", "000005"]
-        index = pd.MultiIndex.from_product(
-            [pd.to_datetime(datetime_list), instruments], names=["datetime", "instrument"]
-        )
-        data = np.random.randn(len(datetime_list) * len(instruments))
-        test_df = pd.DataFrame(data=data, index=index, columns=["factor"])
-        dataset = TSDataSampler(test_df, datetime_list[0], datetime_list[-1], step_len=2)
-        print()
-        print("--------------dataset[0]--------------")
-        print(dataset[0])
-        print("--------------dataset[1]--------------")
-        print(dataset[1])
-        assert len(dataset[0]) == 2
-        self.assertTrue(np.isnan(dataset[0][0]))
-        self.assertEqual(dataset[0][1], dataset[1][0])
-        self.assertEqual(dataset[1][1], dataset[2][0])
-        self.assertEqual(dataset[2][1], dataset[3][0])
-
-    def test_TSDataSampler2(self):
-        """
-        Extra test TSDataSampler to prevent incorrect filling of nan for the values at the front
-        """
-        datetime_list = ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30", "2000-05-31"]
-        instruments = ["000001", "000002", "000003", "000004", "000005"]
-        index = pd.MultiIndex.from_product(
-            [pd.to_datetime(datetime_list), instruments], names=["datetime", "instrument"]
-        )
-        data = np.random.randn(len(datetime_list) * len(instruments))
-        test_df = pd.DataFrame(data=data, index=index, columns=["factor"])
-        dataset = TSDataSampler(test_df, datetime_list[2], datetime_list[-1], step_len=3)
-        print()
-        print("--------------dataset[0]--------------")
-        print(dataset[0])
-        print("--------------dataset[1]--------------")
-        print(dataset[1])
-        for i in range(3):
-            self.assertFalse(np.isnan(dataset[0][i]))
-            self.assertFalse(np.isnan(dataset[1][i]))
-        self.assertEqual(dataset[0][1], dataset[1][0])
-        self.assertEqual(dataset[0][2], dataset[1][1])
-
-
 if __name__ == "__main__":
    unittest.main(verbosity=10)

--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -27,7 +27,7 @@ def train(uri_path: str = None):
            model performance
    """

-    # model initialization
+    # model initiaiton
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # To test __repr__