1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 09:31:18 +08:00

Compare commits

..

8 Commits

Author SHA1 Message Date
Linlang Lv (iSoftStone Information)
5fafba36f2 fix docs 2024-05-21 04:12:44 +08:00
Linlang
8a087d0db9 fix docs (#1721)
* fix docs

* modify file extension

* modify file extension

---------

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-17 19:19:45 +08:00
playfund
2ae4be426a Delete redundant copy() code to speed up (#1732)
Delete redundant copy() code to speed up

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-17 18:45:07 +08:00
fei long
6ed83f7c04 data_collector: cn_index: fix missing dependencies package in requirements.txt (#1770)
add yahooquery and openpyxl in requirements.txt

Signed-off-by: YuLong Yao <feilongphone@gmail.com>
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-17 18:43:12 +08:00
Ikko Eltociear Ashimine
917e3a725e Update dump_pit.py (#1759)
seperated -> separated

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 14:42:41 +08:00
Chuan Xu
b1e0e77c97 Fix the bug of reading string NA as NaN in the function exists_qlib_data. (#1736)
* Fix the bug of reading NA string as NaN in exists_qlib_data.

* Fix the .gitignore file.

* Update the fix and add some comments.

* format with black

---------

Co-authored-by: Chuan Xu <chuan.xu@sas.com>
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 13:09:39 +08:00
Linlang
ea245f5435 Fix issue 1729 (#1776)
* fix issue 1729

* fix issue 1729

* fix issue 1729

---------

Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-10 11:04:59 +08:00
Linlang
3779b5186a bump version (#1784)
Co-authored-by: Linlang Lv (iSoftStone Information) <v-lvlinlang@microsoft.com>
2024-05-08 13:50:55 +08:00
8 changed files with 75 additions and 17 deletions

2
.gitignore vendored
View File

@@ -48,4 +48,4 @@ tags
*.swp *.swp
./pretrain ./pretrain
.idea/ .idea/

View File

@@ -5,6 +5,12 @@
# Required # Required
version: 2 version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.7"
# Build documentation in the docs/ directory with Sphinx # Build documentation in the docs/ directory with Sphinx
sphinx: sphinx:
configuration: docs/conf.py configuration: docs/conf.py
@@ -14,7 +20,6 @@ formats: all
# Optionally set the version of Python and requirements required to build your docs # Optionally set the version of Python and requirements required to build your docs
python: python:
version: 3.7
install: install:
- requirements: docs/requirements.txt - requirements: docs/requirements.txt
- method: pip - method: pip

View File

@@ -5,3 +5,4 @@ scipy
scikit-learn scikit-learn
pandas pandas
tianshou tianshou
sphinx_rtd_theme

View File

@@ -536,7 +536,6 @@ class DatasetProvider(abc.ABC):
""" """
if len(fields) == 0: if len(fields) == 0:
raise ValueError("fields cannot be empty") raise ValueError("fields cannot be empty")
fields = fields.copy()
column_names = [str(f) for f in fields] column_names = [str(f) for f in fields]
return column_names return column_names

View File

@@ -25,7 +25,12 @@ import pandas as pd
from pathlib import Path from pathlib import Path
from typing import List, Union, Optional, Callable from typing import List, Union, Optional, Callable
from packaging import version from packaging import version
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer from .file import (
get_or_create_path,
save_multiple_parts_file,
unpack_archive_with_buffer,
get_tmp_file_with_buffer,
)
from ..config import C from ..config import C
from ..log import get_module_logger, set_log_with_config from ..log import get_module_logger, set_log_with_config
@@ -37,7 +42,12 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
#################### Server #################### #################### Server ####################
def get_redis_connection(): def get_redis_connection():
"""get redis connection instance.""" """get redis connection instance."""
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password) return redis.StrictRedis(
host=C.redis_host,
port=C.redis_port,
db=C.redis_task_db,
password=C.redis_password,
)
#################### Data #################### #################### Data ####################
@@ -96,7 +106,14 @@ def get_period_offset(first_year, period, quarterly):
return offset return offset
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None): def read_period_data(
index_path,
data_path,
period,
cur_date_int: int,
quarterly,
last_period_index: int = None,
):
""" """
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803). At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
Only the updating info before cur_date or at cur_date will be used. Only the updating info before cur_date or at cur_date will be used.
@@ -273,7 +290,10 @@ def parse_field(field):
# \uff09 -> ) # \uff09 -> )
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09" chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
for pattern, new in [ for pattern, new in [
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $ (
rf"\$\$([\w{chinese_punctuation_regex}]+)",
r'PFeature("\1")',
), # $$ must be before $
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'), (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
(r"(\w+\s*)\(", r"Operators.\1("), (r"(\w+\s*)\(", r"Operators.\1("),
]: # Features # Operators ]: # Features # Operators
@@ -383,7 +403,14 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
return calendar return calendar
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None): def get_date_by_shift(
trading_date,
shift,
future=False,
clip_shift=True,
freq="day",
align: Optional[str] = None,
):
"""get trading date with shift bias will cur_date """get trading date with shift bias will cur_date
e.g. : shift == 1, return next trading date e.g. : shift == 1, return next trading date
shift == -1, return previous trading date shift == -1, return previous trading date
@@ -569,7 +596,38 @@ def exists_qlib_data(qlib_dir):
# check instruments # check instruments
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir())) code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
_instrument = instruments_dir.joinpath("all.txt") _instrument = instruments_dir.joinpath("all.txt")
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names) # Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
miss_code = set(
pd.read_csv(
_instrument,
sep="\t",
header=None,
keep_default_na=False,
na_values={
0: [
" ",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NaN",
"None",
"n/a",
"nan",
"null ",
]
},
)
.loc[:, 0]
.apply(str.lower)
) - set(code_names)
if miss_code and any(map(lambda x: "sht" not in x, miss_code)): if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
return False return False

View File

@@ -396,14 +396,7 @@ class CSI500Index(CSIIndex):
today = pd.Timestamp.now() today = pd.Timestamp.now()
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
ret_list = [] ret_list = []
col = ["date", "symbol", "code_name"]
for date in tqdm(date_range, desc="Download CSI500"): for date in tqdm(date_range, desc="Download CSI500"):
rs = bs.query_zz500_stocks(date=str(date))
zz500_stocks = []
while (rs.error_code == "0") & rs.next():
zz500_stocks.append(rs.get_row_data())
result = pd.DataFrame(zz500_stocks, columns=col)
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
result = self.get_data_from_baostock(date) result = self.get_data_from_baostock(date)
ret_list.append(result[["date", "symbol"]]) ret_list.append(result[["date", "symbol"]])
bs.logout() bs.logout()

View File

@@ -5,3 +5,5 @@ pandas
lxml lxml
loguru loguru
tqdm tqdm
yahooquery
openpyxl

View File

@@ -3,7 +3,7 @@
""" """
TODO: TODO:
- A more well-designed PIT database is required. - A more well-designed PIT database is required.
- seperated insert, delete, update, query operations are required. - separated insert, delete, update, query operations are required.
""" """
import shutil import shutil