1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-07 06:21:35 +08:00

Compare commits

..

8 Commits

Author SHA1 Message Date
Linlang Lv (iSoftStone Information)
091f542b42 Merge branch 'main' of github.com:microsoft/qlib into fix_docs 2024-05-17 17:42:15 +08:00
Ikko Eltociear Ashimine
917e3a725e Update dump_pit.py (#1759)
seperated -> separated

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 14:42:41 +08:00
Chuan Xu
b1e0e77c97 Fix the bug of reading string NA as NaN in the function exists_qlib_data. (#1736)
* Fix the bug of reading NA string as NaN in exists_qlib_data.

* Fix the .gitignore file.

* Update the fix and add some comments.

* format with black

---------

Co-authored-by: Chuan Xu <chuan.xu@sas.com>
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 13:09:39 +08:00
Linlang
ea245f5435 Fix issue 1729 (#1776)
* fix issue 1729

* fix issue 1729

* fix issue 1729

---------

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 11:04:59 +08:00
Linlang
3779b5186a bump version (#1784)
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-08 13:50:55 +08:00
Linlang Lv (iSoftStone Information)
77d34e080b modify file extension 2024-04-10 14:29:34 +08:00
Linlang Lv (iSoftStone Information)
69daea0adc modify file extension 2024-04-10 14:24:05 +08:00
Linlang
975aeb7a99 fix docs 2024-01-05 11:43:54 +08:00
5 changed files with 72 additions and 16 deletions

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp
./pretrain
.idea/
.idea/

View File

@@ -5,6 +5,12 @@
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.7"
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
@@ -14,7 +20,6 @@ formats: all
# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.7
install:
- requirements: docs/requirements.txt
- method: pip

View File

@@ -25,7 +25,12 @@ import pandas as pd
from pathlib import Path
from typing import List, Union, Optional, Callable
from packaging import version
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
from .file import (
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C
from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server ####################
def get_redis_connection():
"""get redis connection instance."""
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
return redis.StrictRedis(
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
return offset
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
def read_period_data(
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
"""
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
# \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $
(
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
def get_date_by_shift(
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date
shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
# check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt")
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
# Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False

View File

@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
today = pd.Timestamp.now()
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
ret_list = []
col = ["date", "symbol", "code_name"]
for date in tqdm(date_range, desc="Download CSI500"):
rs = bs.query_zz500_stocks(date=str(date))
zz500_stocks = []
while (rs.error_code == "0") & rs.next():
zz500_stocks.append(rs.get_row_data())
result = pd.DataFrame(zz500_stocks, columns=col)
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
result = self.get_data_from_baostock(date)
ret_list.append(result[["date", "symbol"]])
bs.logout()

View File

@@ -3,7 +3,7 @@
"""
TODO:
- A more well-designed PIT database is required.
- seperated insert, delete, update, query operations are required.
- separated insert, delete, update, query operations are required.
"""
import shutil