fix docs

fix docs (#1721 )
* fix docs * modify file extension * modify file extension --------- Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2026-06-30 09:31:18 +08:00 · 2024-05-21 04:12:44 +08:00 · 2024-05-17 19:19:45 +08:00 · 2024-05-17 18:45:07 +08:00 · 2024-05-17 18:43:12 +08:00 · 2024-05-10 14:42:41 +08:00
8 changed files with 75 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,4 @@ tags
 *.swp
 ./pretrain
-.idea/
+.idea/
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -5,6 +5,12 @@
 # Required
 version: 2
 # Set the version of Python and other tools you might need
 build:
  os: ubuntu-22.04
  tools:
    python: "3.7"
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
  configuration: docs/conf.py
@@ -14,7 +20,6 @@ formats: all
 # Optionally set the version of Python and requirements required to build your docs
 python:
  version: 3.7
  install:
    - requirements: docs/requirements.txt
    - method: pip
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ scipy
 scikit-learn
 pandas
 tianshou
 sphinx_rtd_theme
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -536,7 +536,6 @@ class DatasetProvider(abc.ABC):
        """
        if len(fields) == 0:
            raise ValueError("fields cannot be empty")
        fields = fields.copy()
        column_names = [str(f) for f in fields]
        return column_names
--- a/qlib/utils/init.py
+++ b/qlib/utils/init.py
@@ -25,7 +25,12 @@ import pandas as pd
 from pathlib import Path
 from typing import List, Union, Optional, Callable
 from packaging import version
-from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
+from .file import (
    get_or_create_path,
    save_multiple_parts_file,
    unpack_archive_with_buffer,
    get_tmp_file_with_buffer,
 )
 from ..config import C
 from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
 #################### Server ####################
 def get_redis_connection():
    """get redis connection instance."""
-    return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
+    return redis.StrictRedis(
        host=C.redis_host,
        port=C.redis_port,
        db=C.redis_task_db,
        password=C.redis_password,
    )
 #################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
    return offset
-def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
+def read_period_data(
    index_path,
    data_path,
    period,
    cur_date_int: int,
    quarterly,
    last_period_index: int = None,
 ):
    """
    At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
    Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
    # \uff09 -> )
    chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
    for pattern, new in [
-        (rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'),  # $$ must be before $
+        (
            rf"\$\$([\w{chinese_punctuation_regex}]+)",
            r'PFeature("\1")',
        ),  # $$ must be before $
        (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
        (r"(\w+\s*)\(", r"Operators.\1("),
    ]:  # Features  # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
    return calendar
-def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
+def get_date_by_shift(
    trading_date,
    shift,
    future=False,
    clip_shift=True,
    freq="day",
    align: Optional[str] = None,
 ):
    """get trading date with shift bias will cur_date
        e.g. : shift == 1,  return next trading date
               shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
    # check instruments
    code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
    _instrument = instruments_dir.joinpath("all.txt")
-    miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
+    # Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
    miss_code = set(
        pd.read_csv(
            _instrument,
            sep="\t",
            header=None,
            keep_default_na=False,
            na_values={
                0: [
                    " ",
                    "#N/A",
                    "#N/A N/A",
                    "#NA",
                    "-1.#IND",
                    "-1.#QNAN",
                    "-NaN",
                    "-nan",
                    "1.#IND",
                    "1.#QNAN",
                    "<NA>",
                    "N/A",
                    "NaN",
                    "None",
                    "n/a",
                    "nan",
                    "null ",
                ]
            },
        )
        .loc[:, 0]
        .apply(str.lower)
    ) - set(code_names)
    if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
        return False
--- a/scripts/data_collector/cn_index/collector.py
+++ b/scripts/data_collector/cn_index/collector.py
@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
        today = pd.Timestamp.now()
        date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
        ret_list = []
        col = ["date", "symbol", "code_name"]
        for date in tqdm(date_range, desc="Download CSI500"):
            rs = bs.query_zz500_stocks(date=str(date))
            zz500_stocks = []
            while (rs.error_code == "0") & rs.next():
                zz500_stocks.append(rs.get_row_data())
            result = pd.DataFrame(zz500_stocks, columns=col)
            result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
            result = self.get_data_from_baostock(date)
            ret_list.append(result[["date", "symbol"]])
        bs.logout()
--- a/scripts/data_collector/cn_index/requirements.txt
+++ b/scripts/data_collector/cn_index/requirements.txt
@@ -5,3 +5,5 @@ pandas
 lxml
 loguru
 tqdm
 yahooquery
 openpyxl
--- a/scripts/dump_pit.py
+++ b/scripts/dump_pit.py
@@ -3,7 +3,7 @@
 """
 TODO:
 - A more well-designed PIT database is required.
-    - seperated insert, delete, update, query operations are required.
+    - separated insert, delete, update, query operations are required.
 """
 import shutil
Author	SHA1	Message	Date
Linlang Lv (iSoftStone Information)	5fafba36f2	fix docs	2024-05-21 04:12:44 +08:00
Linlang	8a087d0db9	fix docs (#1721 ) * fix docs * modify file extension * modify file extension --------- Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-17 19:19:45 +08:00
playfund	2ae4be426a	Delete redundant copy() code to speed up (#1732 ) Delete redundant copy() code to speed up Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-17 18:45:07 +08:00
fei long	6ed83f7c04	data_collector: cn_index: fix missing dependencies package in requirements.txt (#1770 ) add yahooquery and openpyxl in requirements.txt Signed-off-by: YuLong Yao <feilongphone@gmail.com> Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-17 18:43:12 +08:00
Ikko Eltociear Ashimine	917e3a725e	Update dump_pit.py (#1759 ) seperated -> separated Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-10 14:42:41 +08:00
Chuan Xu	b1e0e77c97	Fix the bug of reading string NA as NaN in the function exists_qlib_data. (#1736 ) * Fix the bug of reading NA string as NaN in exists_qlib_data. * Fix the .gitignore file. * Update the fix and add some comments. * format with black --------- Co-authored-by: Chuan Xu <chuan.xu@sas.com> Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-10 13:09:39 +08:00
Linlang	ea245f5435	Fix issue 1729 (#1776 ) * fix issue 1729 * fix issue 1729 * fix issue 1729 --------- Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-10 11:04:59 +08:00
Linlang	3779b5186a	bump version (#1784 ) Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>	2024-05-08 13:50:55 +08:00