1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Fix issue 1892 (#1916)

* fix: resolve #1892 by retriving the data page by page

* fix: resolve #1892 by retriving the data page by page

* reformat with black

---------

Co-authored-by: shengyuhong <shengyuhong@bytedance.com>
Co-authored-by: fibers <yu8582@126.com>
This commit is contained in:
Linlang
2025-04-27 13:58:10 +08:00
committed by GitHub
parent 320bd65e19
commit 950408ef46

View File

@@ -202,18 +202,59 @@ def get_hs_stock_symbols() -> list:
------- -------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...} {600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
""" """
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12" # url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
try:
resp = requests.get(url, timeout=None)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
try: base_url = "http://99.push2.eastmoney.com/api/qt/clist/get"
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]] params = {
except Exception as e: "pn": 1, # page number
logger.warning("An error occurred while extracting data from the response.") "pz": 100, # page size, default to 100
raise "po": 1,
"np": 1,
"fs": "m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048",
"fields": "f12",
}
_symbols = []
page = 1
while True:
params["pn"] = page
try:
resp = requests.get(base_url, params=params, timeout=None)
resp.raise_for_status()
data = resp.json()
# Check if response contains valid data
if not data or "data" not in data or not data["data"] or "diff" not in data["data"]:
logger.warning(f"Invalid response structure on page {page}")
break
# fetch the current page data
current_symbols = [_v["f12"] for _v in data["data"]["diff"]]
if not current_symbols: # It's the last page if there is no data in current page
logger.info(f"Last page reached: {page - 1}")
break
_symbols.extend(current_symbols)
# show progress
logger.info(
f"Page {page}: fetch {len(current_symbols)} stocks:[{current_symbols[0]} ... {current_symbols[-1]}]"
)
page += 1
# sleep time to avoid overloading the server
time.sleep(0.5)
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(
f"Request to {base_url} failed with status code {resp.status_code}"
) from e
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise
if len(_symbols) < 3900: if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.") raise ValueError("The complete list of stocks is not available.")