mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-30 09:31:18 +08:00
Compare commits
1 Commits
fix_docume
...
bump_versi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ce41c583c |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -48,4 +48,4 @@ tags
|
|||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
./pretrain
|
./pretrain
|
||||||
.idea/
|
.idea/
|
||||||
|
|||||||
@@ -5,12 +5,6 @@
|
|||||||
# Required
|
# Required
|
||||||
version: 2
|
version: 2
|
||||||
|
|
||||||
# Set the version of Python and other tools you might need
|
|
||||||
build:
|
|
||||||
os: ubuntu-22.04
|
|
||||||
tools:
|
|
||||||
python: "3.7"
|
|
||||||
|
|
||||||
# Build documentation in the docs/ directory with Sphinx
|
# Build documentation in the docs/ directory with Sphinx
|
||||||
sphinx:
|
sphinx:
|
||||||
configuration: docs/conf.py
|
configuration: docs/conf.py
|
||||||
@@ -20,6 +14,7 @@ formats: all
|
|||||||
|
|
||||||
# Optionally set the version of Python and requirements required to build your docs
|
# Optionally set the version of Python and requirements required to build your docs
|
||||||
python:
|
python:
|
||||||
|
version: 3.7
|
||||||
install:
|
install:
|
||||||
- requirements: docs/requirements.txt
|
- requirements: docs/requirements.txt
|
||||||
- method: pip
|
- method: pip
|
||||||
@@ -5,4 +5,3 @@ scipy
|
|||||||
scikit-learn
|
scikit-learn
|
||||||
pandas
|
pandas
|
||||||
tianshou
|
tianshou
|
||||||
sphinx_rtd_theme
|
|
||||||
|
|||||||
@@ -536,6 +536,7 @@ class DatasetProvider(abc.ABC):
|
|||||||
"""
|
"""
|
||||||
if len(fields) == 0:
|
if len(fields) == 0:
|
||||||
raise ValueError("fields cannot be empty")
|
raise ValueError("fields cannot be empty")
|
||||||
|
fields = fields.copy()
|
||||||
column_names = [str(f) for f in fields]
|
column_names = [str(f) for f in fields]
|
||||||
return column_names
|
return column_names
|
||||||
|
|
||||||
|
|||||||
@@ -25,12 +25,7 @@ import pandas as pd
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Union, Optional, Callable
|
from typing import List, Union, Optional, Callable
|
||||||
from packaging import version
|
from packaging import version
|
||||||
from .file import (
|
from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
|
||||||
get_or_create_path,
|
|
||||||
save_multiple_parts_file,
|
|
||||||
unpack_archive_with_buffer,
|
|
||||||
get_tmp_file_with_buffer,
|
|
||||||
)
|
|
||||||
from ..config import C
|
from ..config import C
|
||||||
from ..log import get_module_logger, set_log_with_config
|
from ..log import get_module_logger, set_log_with_config
|
||||||
|
|
||||||
@@ -42,12 +37,7 @@ is_deprecated_lexsorted_pandas = version.parse(pd.__version__) > version.parse("
|
|||||||
#################### Server ####################
|
#################### Server ####################
|
||||||
def get_redis_connection():
|
def get_redis_connection():
|
||||||
"""get redis connection instance."""
|
"""get redis connection instance."""
|
||||||
return redis.StrictRedis(
|
return redis.StrictRedis(host=C.redis_host, port=C.redis_port, db=C.redis_task_db, password=C.redis_password)
|
||||||
host=C.redis_host,
|
|
||||||
port=C.redis_port,
|
|
||||||
db=C.redis_task_db,
|
|
||||||
password=C.redis_password,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
#################### Data ####################
|
#################### Data ####################
|
||||||
@@ -106,14 +96,7 @@ def get_period_offset(first_year, period, quarterly):
|
|||||||
return offset
|
return offset
|
||||||
|
|
||||||
|
|
||||||
def read_period_data(
|
def read_period_data(index_path, data_path, period, cur_date_int: int, quarterly, last_period_index: int = None):
|
||||||
index_path,
|
|
||||||
data_path,
|
|
||||||
period,
|
|
||||||
cur_date_int: int,
|
|
||||||
quarterly,
|
|
||||||
last_period_index: int = None,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
|
At `cur_date`(e.g. 20190102), read the information at `period`(e.g. 201803).
|
||||||
Only the updating info before cur_date or at cur_date will be used.
|
Only the updating info before cur_date or at cur_date will be used.
|
||||||
@@ -290,10 +273,7 @@ def parse_field(field):
|
|||||||
# \uff09 -> )
|
# \uff09 -> )
|
||||||
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
|
chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09"
|
||||||
for pattern, new in [
|
for pattern, new in [
|
||||||
(
|
(rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $
|
||||||
rf"\$\$([\w{chinese_punctuation_regex}]+)",
|
|
||||||
r'PFeature("\1")',
|
|
||||||
), # $$ must be before $
|
|
||||||
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
|
(rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'),
|
||||||
(r"(\w+\s*)\(", r"Operators.\1("),
|
(r"(\w+\s*)\(", r"Operators.\1("),
|
||||||
]: # Features # Operators
|
]: # Features # Operators
|
||||||
@@ -403,14 +383,7 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False):
|
|||||||
return calendar
|
return calendar
|
||||||
|
|
||||||
|
|
||||||
def get_date_by_shift(
|
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None):
|
||||||
trading_date,
|
|
||||||
shift,
|
|
||||||
future=False,
|
|
||||||
clip_shift=True,
|
|
||||||
freq="day",
|
|
||||||
align: Optional[str] = None,
|
|
||||||
):
|
|
||||||
"""get trading date with shift bias will cur_date
|
"""get trading date with shift bias will cur_date
|
||||||
e.g. : shift == 1, return next trading date
|
e.g. : shift == 1, return next trading date
|
||||||
shift == -1, return previous trading date
|
shift == -1, return previous trading date
|
||||||
@@ -596,38 +569,7 @@ def exists_qlib_data(qlib_dir):
|
|||||||
# check instruments
|
# check instruments
|
||||||
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
|
code_names = set(map(lambda x: fname_to_code(x.name.lower()), features_dir.iterdir()))
|
||||||
_instrument = instruments_dir.joinpath("all.txt")
|
_instrument = instruments_dir.joinpath("all.txt")
|
||||||
# Removed two possible ticker names "NA" and "NULL" from the default na_values list for column 0
|
miss_code = set(pd.read_csv(_instrument, sep="\t", header=None).loc[:, 0].apply(str.lower)) - set(code_names)
|
||||||
miss_code = set(
|
|
||||||
pd.read_csv(
|
|
||||||
_instrument,
|
|
||||||
sep="\t",
|
|
||||||
header=None,
|
|
||||||
keep_default_na=False,
|
|
||||||
na_values={
|
|
||||||
0: [
|
|
||||||
" ",
|
|
||||||
"#N/A",
|
|
||||||
"#N/A N/A",
|
|
||||||
"#NA",
|
|
||||||
"-1.#IND",
|
|
||||||
"-1.#QNAN",
|
|
||||||
"-NaN",
|
|
||||||
"-nan",
|
|
||||||
"1.#IND",
|
|
||||||
"1.#QNAN",
|
|
||||||
"<NA>",
|
|
||||||
"N/A",
|
|
||||||
"NaN",
|
|
||||||
"None",
|
|
||||||
"n/a",
|
|
||||||
"nan",
|
|
||||||
"null ",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.loc[:, 0]
|
|
||||||
.apply(str.lower)
|
|
||||||
) - set(code_names)
|
|
||||||
if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
|
if miss_code and any(map(lambda x: "sht" not in x, miss_code)):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -396,7 +396,14 @@ class CSI500Index(CSIIndex):
|
|||||||
today = pd.Timestamp.now()
|
today = pd.Timestamp.now()
|
||||||
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
|
date_range = pd.DataFrame(pd.date_range(start="2007-01-15", end=today, freq="7D"))[0].dt.date
|
||||||
ret_list = []
|
ret_list = []
|
||||||
|
col = ["date", "symbol", "code_name"]
|
||||||
for date in tqdm(date_range, desc="Download CSI500"):
|
for date in tqdm(date_range, desc="Download CSI500"):
|
||||||
|
rs = bs.query_zz500_stocks(date=str(date))
|
||||||
|
zz500_stocks = []
|
||||||
|
while (rs.error_code == "0") & rs.next():
|
||||||
|
zz500_stocks.append(rs.get_row_data())
|
||||||
|
result = pd.DataFrame(zz500_stocks, columns=col)
|
||||||
|
result["symbol"] = result["symbol"].apply(lambda x: x.replace(".", "").upper())
|
||||||
result = self.get_data_from_baostock(date)
|
result = self.get_data_from_baostock(date)
|
||||||
ret_list.append(result[["date", "symbol"]])
|
ret_list.append(result[["date", "symbol"]])
|
||||||
bs.logout()
|
bs.logout()
|
||||||
|
|||||||
@@ -5,5 +5,3 @@ pandas
|
|||||||
lxml
|
lxml
|
||||||
loguru
|
loguru
|
||||||
tqdm
|
tqdm
|
||||||
yahooquery
|
|
||||||
openpyxl
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
"""
|
"""
|
||||||
TODO:
|
TODO:
|
||||||
- A more well-designed PIT database is required.
|
- A more well-designed PIT database is required.
|
||||||
- separated insert, delete, update, query operations are required.
|
- seperated insert, delete, update, query operations are required.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|||||||
Reference in New Issue
Block a user