1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 17:41:18 +08:00

Compare commits

..

7 Commits

Author SHA1 Message Date
Linlang Lv (iSoftStone Information)
091f542b42 Merge branch 'main' of github.com:microsoft/qlib into fix_docs 2024-05-17 17:42:15 +08:00
Ikko Eltociear Ashimine
917e3a725e Update dump_pit.py (#1759)
seperated -> separated

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 14:42:41 +08:00
Chuan Xu
b1e0e77c97 Fix the bug of reading string NA as NaN in the function exists_qlib_data. (#1736)
* Fix the bug of reading NA string as NaN in exists_qlib_data.

* Fix the .gitignore file.

* Update the fix and add some comments.

* format with black

---------

Co-authored-by: Chuan Xu <chuan.xu@sas.com>
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 13:09:39 +08:00
Linlang
ea245f5435 Fix issue 1729 (#1776)
* fix issue 1729

* fix issue 1729

* fix issue 1729

---------

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 11:04:59 +08:00
Linlang Lv (iSoftStone Information)
77d34e080b modify file extension 2024-04-10 14:29:34 +08:00
Linlang Lv (iSoftStone Information)
69daea0adc modify file extension 2024-04-10 14:24:05 +08:00
Linlang
975aeb7a99 fix docs 2024-01-05 11:43:54 +08:00
4 changed files with 72 additions and 9 deletions

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp *.swp
./pretrain ./pretrain
.idea/ .idea/

View File

@@ -5,6 +5,12 @@
# Required # Required
version: 2 version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.7"
# Build documentation in the docs/ directory with Sphinx # Build documentation in the docs/ directory with Sphinx
sphinx: sphinx:
configuration: docs/conf.py configuration: docs/conf.py
@@ -14,7 +20,6 @@ formats: all
# Optionally set the version of Python and requirements required to build your docs # Optionally set the version of Python and requirements required to build your docs
python: python:
version: 3.7
install: install:
- requirements: docs/requirements.txt - requirements: docs/requirements.txt
- method: pip - method: pip

View File

@@ -25,7 +25,12 @@ import pandas as pd
from pathlib import Path from pathlib import Path
from typing import List, Union, Optional, Callable from typing import List, Union, Optional, Callable
from packaging import version from packaging import version
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer from .file import (
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C from ..config import C
from ..log import get_module_logger, set_log_with_config from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server #################### #################### Server ####################
def get_redis_connection(): def get_redis_connection():
"""get redis connection instance.""" """get redis connection instance."""
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password) return redis.StrictRedis(
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data #################### #################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
return offset return offset
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None): def read_period_data(
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
""" """
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803). At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used. Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
# \uff09 -> ) # \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09" chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [ for pattern, new in [
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $ (
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'), (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("), (r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators ]: # Features # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar return calendar
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None): def get_date_by_shift(
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date """get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date e.g. : shift == 1, return next trading date
shift == -1, return previous trading date shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
# check instruments # check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir())) code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt") _instrument = instruments_dir.joinpath("all.txt")
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names) # Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)): if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False return False

View File

@@ -3,7 +3,7 @@
""" """
TODO: TODO:
- A more well-designed PIT database is required. - A more well-designed PIT database is required.
- seperated insert, delete, update, query operations are required. - separated insert, delete, update, query operations are required.
""" """
import shutil import shutil