mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
black format
This commit is contained in:
@@ -28,6 +28,7 @@ from data_collector.utils import get_en_fund_symbols
|
||||
INDEX_BENCH_URL = "http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery_&fundCode={index_code}&pageIndex=1&pageSize={numberOfHistoricalDaysToCrawl}&startDate={startDate}&endDate={endDate}"
|
||||
REGION_CN = "CN"
|
||||
|
||||
|
||||
class FundData:
|
||||
START_DATETIME = pd.Timestamp("2000-01-01")
|
||||
END_DATETIME = pd.Timestamp(datetime.datetime.now() + pd.Timedelta(days=1))
|
||||
@@ -85,12 +86,14 @@ class FundData:
|
||||
|
||||
try:
|
||||
# TODO: numberOfHistoricalDaysToCrawl should be bigger enouhg
|
||||
url = INDEX_BENCH_URL.format(index_code=symbol, numberOfHistoricalDaysToCrawl=10000, startDate=start, endDate=end)
|
||||
url = INDEX_BENCH_URL.format(
|
||||
index_code=symbol, numberOfHistoricalDaysToCrawl=10000, startDate=start, endDate=end
|
||||
)
|
||||
resp = requests.get(url, headers={"referer": "http://fund.eastmoney.com/110022.html"})
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise ValueError("request error")
|
||||
|
||||
|
||||
data = json.loads(resp.text.split("(")[-1].split(")")[0])
|
||||
|
||||
# Some funds don't show the net value, example: http://fundf10.eastmoney.com/jjjz_010288.html
|
||||
@@ -280,6 +283,7 @@ class FundCollector:
|
||||
logger.warning(f"less than {self.min_numbers_trading} fund list: {list(self._mini_symbol_map.keys())}")
|
||||
logger.info(f"total {len(self.fund_list)}, error: {len(set(fund_list))}")
|
||||
|
||||
|
||||
class FundollectorCN(FundCollector, ABC):
|
||||
def get_fund_list(self):
|
||||
logger.info("get cn fund symbols......")
|
||||
@@ -297,6 +301,7 @@ class FundCollectorCN1d(FundollectorCN):
|
||||
def min_numbers_trading(self):
|
||||
return 252 / 4
|
||||
|
||||
|
||||
class Run:
|
||||
def __init__(self, source_dir=None, max_workers=4, region=REGION_CN):
|
||||
"""
|
||||
@@ -354,9 +359,7 @@ class Run:
|
||||
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
|
||||
"""
|
||||
|
||||
_class = getattr(
|
||||
self._cur_module, f"FundCollector{self.region.upper()}{interval}"
|
||||
) # type: Type[FundCollector]
|
||||
_class = getattr(self._cur_module, f"FundCollector{self.region.upper()}{interval}") # type: Type[FundCollector]
|
||||
_class(
|
||||
self.source_dir,
|
||||
max_workers=self.max_workers,
|
||||
@@ -369,5 +372,6 @@ class Run:
|
||||
limit_nums=limit_nums,
|
||||
).collector_data()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(Run)
|
||||
|
||||
@@ -239,7 +239,7 @@ def get_en_fund_symbols(qlib_data_path: [str, Path] = None) -> list:
|
||||
try:
|
||||
_symbols = []
|
||||
for sub_data in re.findall(r"[\[](.*?)[\]]", resp.content.decode().split("= [")[-1].replace("];", "")):
|
||||
data = sub_data.replace("\"","").replace("'","")
|
||||
data = sub_data.replace('"', "").replace("'", "")
|
||||
# TODO: do we need other informations, like fund_name from ['000001', 'HXCZHH', '华夏成长混合', '混合型', 'HUAXIACHENGZHANGHUNHE']
|
||||
_symbols.append(data.split(",")[0])
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user