mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Fix issue 1892 (#1916)
* fix: resolve #1892 by retriving the data page by page * fix: resolve #1892 by retriving the data page by page * reformat with black --------- Co-authored-by: shengyuhong <shengyuhong@bytedance.com> Co-authored-by: fibers <yu8582@126.com>
This commit is contained in:
@@ -202,15 +202,56 @@ def get_hs_stock_symbols() -> list:
|
||||
-------
|
||||
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
|
||||
"""
|
||||
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
|
||||
try:
|
||||
resp = requests.get(url, timeout=None)
|
||||
resp.raise_for_status()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
|
||||
# url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
|
||||
|
||||
base_url = "http://99.push2.eastmoney.com/api/qt/clist/get"
|
||||
params = {
|
||||
"pn": 1, # page number
|
||||
"pz": 100, # page size, default to 100
|
||||
"po": 1,
|
||||
"np": 1,
|
||||
"fs": "m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048",
|
||||
"fields": "f12",
|
||||
}
|
||||
|
||||
_symbols = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
params["pn"] = page
|
||||
try:
|
||||
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
|
||||
resp = requests.get(base_url, params=params, timeout=None)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# Check if response contains valid data
|
||||
if not data or "data" not in data or not data["data"] or "diff" not in data["data"]:
|
||||
logger.warning(f"Invalid response structure on page {page}")
|
||||
break
|
||||
|
||||
# fetch the current page data
|
||||
current_symbols = [_v["f12"] for _v in data["data"]["diff"]]
|
||||
|
||||
if not current_symbols: # It's the last page if there is no data in current page
|
||||
logger.info(f"Last page reached: {page - 1}")
|
||||
break
|
||||
|
||||
_symbols.extend(current_symbols)
|
||||
|
||||
# show progress
|
||||
logger.info(
|
||||
f"Page {page}: fetch {len(current_symbols)} stocks:[{current_symbols[0]} ... {current_symbols[-1]}]"
|
||||
)
|
||||
|
||||
page += 1
|
||||
|
||||
# sleep time to avoid overloading the server
|
||||
time.sleep(0.5)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
raise requests.exceptions.HTTPError(
|
||||
f"Request to {base_url} failed with status code {resp.status_code}"
|
||||
) from e
|
||||
except Exception as e:
|
||||
logger.warning("An error occurred while extracting data from the response.")
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user