1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-03 19:10:58 +08:00

fix: possible bug causing missing calendar_list data

This commit is contained in:
Linlang
2026-01-28 16:59:40 +08:00
parent 1ace03cf77
commit 34a2372f01

View File

@@ -25,7 +25,6 @@ from bs4 import BeautifulSoup
HS_SYMBOLS_URL = "http://app.finance.ifeng.com/hq/list.php?type=stock_a&class={s_type}"
# CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={start}&end={end}"
CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0"
SZSE_CALENDAR_URL = "http://www.szse.cn/api/report/exchange/onepersistenthour/monthList?month={month}&random={random}"
@@ -97,37 +96,39 @@ def get_calendar_list(bench_code="CSI300") -> List[pd.Timestamp]:
logger.info(f"get calendar list: {bench_code}......")
def _get_calendar(url):
def _get_calendar(url, max_retry=3):
session = requests.Session()
session.headers.update(build_headers())
current_datetime = datetime.datetime.now()
cur_year = current_datetime.year
res_list = []
for per_year in range(2000, cur_year + 1):
start = f"{per_year}0101"
end = f"{per_year}1231"
failed_years = []
for year in range(2000, cur_year + 1):
start = f"{year}0101"
end = f"{year}1231"
formatted_url = url + f"&beg={start}&end={end}".format(start=start, end=end)
try:
resp = session.get(formatted_url, timeout=10)
resp.raise_for_status()
payload = resp.json()
data = payload.get("data")
if not data or "klines" not in data:
continue
for attempt in range(max_retry):
try:
resp = session.get(formatted_url, timeout=10)
resp.raise_for_status()
klines = data["klines"]
res_list.extend(pd.Timestamp(x.split(",")[0]) for x in klines)
data = resp.json().get("data")
if not data or "klines" not in data:
raise ValueError("missing klines")
except requests.RequestException as e:
continue
res_list.extend(pd.Timestamp(x.split(",")[0]) for x in data["klines"])
break
time.sleep(random.uniform(0.5, 1.2))
except Exception as e:
time.sleep(random.uniform(0.8, 1.5))
else:
failed_years.append(year)
if failed_years:
logger.warning(f"Calendar incomplete, failed years: {failed_years}")
return sorted(set(res_list))
# _value_list = requests.get(url, timeout=None).json()["data"]["klines"]
# return sorted(map(lambda x: pd.Timestamp(x.split(",")[0]), _value_list))
calendar = _CALENDAR_MAP.get(bench_code, None)
if calendar is None:
if bench_code.startswith("US_") or bench_code.startswith("IN_") or bench_code.startswith("BR_"):