1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 09:31:18 +08:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Linlang Lv (iSoftStone Information)
6ce41c583c bump version 2024-05-07 18:26:08 +08:00
8 changed files with 17 additions and 75 deletions

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp *.swp
./pretrain ./pretrain
.idea/ .idea/

View File

@@ -5,12 +5,6 @@
# Required # Required
version: 2 version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.7"
# Build documentation in the docs/ directory with Sphinx # Build documentation in the docs/ directory with Sphinx
sphinx: sphinx:
configuration: docs/conf.py configuration: docs/conf.py
@@ -20,6 +14,7 @@ formats: all
# Optionally set the version of Python and requirements required to build your docs # Optionally set the version of Python and requirements required to build your docs
python: python:
version: 3.7
install: install:
- requirements: docs/requirements.txt - requirements: docs/requirements.txt
- method: pip - method: pip

View File

@@ -5,4 +5,3 @@ scipy
scikit-learn scikit-learn
pandas pandas
tianshou tianshou
sphinx_rtd_theme

View File

@@ -536,6 +536,7 @@ class DatasetProvider(abc.ABC):
""" """
if len(fields) == 0: if len(fields) == 0:
raise ValueError("fields cannot be empty") raise ValueError("fields cannot be empty")
fields = fields.copy()
column_names = [str(f) for f in fields] column_names = [str(f) for f in fields]
return column_names return column_names

View File

@@ -25,12 +25,7 @@ import pandas as pd
from pathlib import Path from pathlib import Path
from typing import List, Union, Optional, Callable from typing import List, Union, Optional, Callable
from packaging import version from packaging import version
from .file import ( from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C from ..config import C
from ..log import get_module_logger, set_log_with_config from ..log import get_module_logger, set_log_with_config
@@ -42,12 +37,7 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server #################### #################### Server ####################
def get_redis_connection(): def get_redis_connection():
"""get redis connection instance.""" """get redis connection instance."""
return redis.StrictRedis( return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data #################### #################### Data ####################
@@ -106,14 +96,7 @@ def get_period_offset(first_year, period, quarterly):
return offset return offset
def read_period_data( def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
""" """
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803). At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used. Only the updating info before cur_date or at cur_date will be used.
@@ -290,10 +273,7 @@ def parse_field(field):
# \uff09 -> ) # \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09" chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [ for pattern, new in [
( (rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'), (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("), (r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators ]: # Features # Operators
@@ -403,14 +383,7 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar return calendar
def get_date_by_shift( def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date """get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date e.g. : shift == 1, return next trading date
shift == -1, return previous trading date shift == -1, return previous trading date
@@ -596,38 +569,7 @@ def exists_qlib_data(qlib_dir):
# check instruments # check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir())) code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt") _instrument = instruments_dir.joinpath("all.txt")
# Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0 miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)): if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False return False

View File

@@ -396,7 +396,14 @@ class CSI500Index(CSIIndex):
today = pd.Timestamp.now() today = pd.Timestamp.now()
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
ret_list = [] ret_list = []
col = ["date", "symbol", "code_name"]
for date in tqdm(date_range, desc="Download CSI500"): for date in tqdm(date_range, desc="Download CSI500"):
rs = bs.query_zz500_stocks(date=str(date))
zz500_stocks = []
while (rs.error_code == "0") & rs.next():
zz500_stocks.append(rs.get_row_data())
result = pd.DataFrame(zz500_stocks, columns=col)
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
result = self.get_data_from_baostock(date) result = self.get_data_from_baostock(date)
ret_list.append(result[["date", "symbol"]]) ret_list.append(result[["date", "symbol"]])
bs.logout() bs.logout()

View File

@@ -5,5 +5,3 @@ pandas
lxml lxml
loguru loguru
tqdm tqdm
yahooquery
openpyxl

View File

@@ -3,7 +3,7 @@
""" """
TODO: TODO:
- A more well-designed PIT database is required. - A more well-designed PIT database is required.
- separated insert, delete, update, query operations are required. - seperated insert, delete, update, query operations are required.
""" """
import shutil import shutil