mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 19:10:58 +08:00
fix: possible bug causing missing calendar_list data
This commit is contained in:
@@ -25,7 +25,6 @@ from bs4 import BeautifulSoup
|
||||
|
||||
HS_SYMBOLS_URL = "http://app.finance.ifeng.com/hq/list.php?type=stock_a&class={s_type}"
|
||||
|
||||
# CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={start}&end={end}"
|
||||
CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0"
|
||||
SZSE_CALENDAR_URL = "http://www.szse.cn/api/report/exchange/onepersistenthour/monthList?month={month}&random={random}"
|
||||
|
||||
@@ -97,37 +96,39 @@ def get_calendar_list(bench_code="CSI300") -> List[pd.Timestamp]:
|
||||
|
||||
logger.info(f"get calendar list: {bench_code}......")
|
||||
|
||||
def _get_calendar(url):
|
||||
def _get_calendar(url, max_retry=3):
|
||||
session = requests.Session()
|
||||
session.headers.update(build_headers())
|
||||
current_datetime = datetime.datetime.now()
|
||||
cur_year = current_datetime.year
|
||||
res_list = []
|
||||
for per_year in range(2000, cur_year + 1):
|
||||
start = f"{per_year}0101"
|
||||
end = f"{per_year}1231"
|
||||
failed_years = []
|
||||
for year in range(2000, cur_year + 1):
|
||||
start = f"{year}0101"
|
||||
end = f"{year}1231"
|
||||
formatted_url = url + f"&beg={start}&end={end}".format(start=start, end=end)
|
||||
try:
|
||||
resp = session.get(formatted_url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
data = payload.get("data")
|
||||
if not data or "klines" not in data:
|
||||
continue
|
||||
for attempt in range(max_retry):
|
||||
try:
|
||||
resp = session.get(formatted_url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
|
||||
klines = data["klines"]
|
||||
res_list.extend(pd.Timestamp(x.split(",")[0]) for x in klines)
|
||||
data = resp.json().get("data")
|
||||
if not data or "klines" not in data:
|
||||
raise ValueError("missing klines")
|
||||
|
||||
except requests.RequestException as e:
|
||||
continue
|
||||
res_list.extend(pd.Timestamp(x.split(",")[0]) for x in data["klines"])
|
||||
break
|
||||
|
||||
time.sleep(random.uniform(0.5, 1.2))
|
||||
except Exception as e:
|
||||
time.sleep(random.uniform(0.8, 1.5))
|
||||
else:
|
||||
failed_years.append(year)
|
||||
|
||||
if failed_years:
|
||||
logger.warning(f"Calendar incomplete, failed years: {failed_years}")
|
||||
|
||||
return sorted(set(res_list))
|
||||
|
||||
# _value_list = requests.get(url, timeout=None).json()["data"]["klines"]
|
||||
# return sorted(map(lambda x: pd.Timestamp(x.split(",")[0]), _value_list))
|
||||
|
||||
calendar = _CALENDAR_MAP.get(bench_code, None)
|
||||
if calendar is None:
|
||||
if bench_code.startswith("US_") or bench_code.startswith("IN_") or bench_code.startswith("BR_"):
|
||||
|
||||
Reference in New Issue
Block a user