1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix the bug that the HS_SYMBOLS_URL is 404 (#1758)

* fix the bug that the HS_SYMBOLS_URL is 404

* fix bug

* format with black

* fix pylint error

* change error code

* fix ci error

* fix ci error

* optimize code

* optimize code

* add comments

---------

Co-authored-by: Linlang <Lv.Linlang@hotmail.com>
This commit is contained in:
Hao Zhao
2024-06-01 08:07:34 +08:00
committed by GitHub
parent 598017f634
commit 35e0fdd1c0
3 changed files with 43 additions and 12 deletions

View File

@@ -45,6 +45,9 @@ jobs:
- name: Qlib installation test - name: Qlib installation test
run: | run: |
# 2024-05-30 scs has released a new version: 3.2.4.post2,
# This will cause the CI to fail, so we have limited the version of scs for now.
python -m pip install "scs<=3.2.4"
python -m pip install pyqlib python -m pip install pyqlib
- name: Install Lightgbm for MacOS - name: Install Lightgbm for MacOS

View File

@@ -15,7 +15,6 @@ from typing import Iterable, Tuple, List
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from lxml import etree
from loguru import logger from loguru import logger
from yahooquery import Ticker from yahooquery import Ticker
from tqdm import tqdm from tqdm import tqdm
@@ -190,17 +189,43 @@ def get_hs_stock_symbols() -> list:
global _HS_SYMBOLS # pylint: disable=W0603 global _HS_SYMBOLS # pylint: disable=W0603
def _get_symbol(): def _get_symbol():
_res = set() """
for _k, _v in (("ha", "ss"), ("sa", "sz"), ("gem", "sz")): Get the stock pool from a web page and process it into the format required by yahooquery.
resp = requests.get(HS_SYMBOLS_URL.format(s_type=_k), timeout=None) Format of data retrieved from the web page: 600519, 000001
_res |= set( The data format required by yahooquery: 600519.ss, 000001.sz
map(
lambda x: "{}.{}".format(re.findall(r"\d+", x)[0], _v), # pylint: disable=W0640 Returns
etree.HTML(resp.text).xpath("//div[@class='result']/ul//li/a/text()"), # pylint: disable=I1101 -------
) set: Returns the set of symbol codes.
)
time.sleep(3) Examples:
return _res -------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
"""
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
try:
resp = requests.get(url, timeout=None)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
try:
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise
if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.")
# Add suffix after the stock code to conform to yahooquery standard, otherwise the data will not be fetched.
_symbols = [
_symbol + ".ss" if _symbol.startswith("6") else _symbol + ".sz" if _symbol.startswith(("0", "3")) else None
for _symbol in _symbols
]
_symbols = [_symbol for _symbol in _symbols if _symbol is not None]
return set(_symbols)
if _HS_SYMBOLS is None: if _HS_SYMBOLS is None:
symbols = set() symbols = set()

View File

@@ -166,6 +166,9 @@ setup(
"lxml", "lxml",
"baostock", "baostock",
"yahooquery", "yahooquery",
# 2024-05-30 scs has released a new version: 3.2.4.post2,
# this version, causes qlib installation to fail, so we've limited the scs version a bit for now.
"scs<=3.2.4",
"beautifulsoup4", "beautifulsoup4",
# In version 0.4.11 of tianshou, the code: # In version 0.4.11 of tianshou, the code:
# logits, hidden = self.actor(batch.obs, state=state, info=batch.info) # logits, hidden = self.actor(batch.obs, state=state, info=batch.info)