1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

black format

This commit is contained in:
wangershi
2021-03-01 21:10:46 +08:00
parent 51baf57b40
commit 82353b20e1
2 changed files with 10 additions and 6 deletions

View File

@@ -28,6 +28,7 @@ from data_collector.utils import get_en_fund_symbols
INDEX_BENCH_URL = "http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery_&fundCode={index_code}&pageIndex=1&pageSize={numberOfHistoricalDaysToCrawl}&startDate={startDate}&endDate={endDate}"
REGION_CN = "CN"
class FundData:
START_DATETIME = pd.Timestamp("2000-01-01")
END_DATETIME = pd.Timestamp(datetime.datetime.now() + pd.Timedelta(days=1))
@@ -85,12 +86,14 @@ class FundData:
try:
# TODO: numberOfHistoricalDaysToCrawl should be bigger enouhg
url = INDEX_BENCH_URL.format(index_code=symbol, numberOfHistoricalDaysToCrawl=10000, startDate=start, endDate=end)
url = INDEX_BENCH_URL.format(
index_code=symbol, numberOfHistoricalDaysToCrawl=10000, startDate=start, endDate=end
)
resp = requests.get(url, headers={"referer": "http://fund.eastmoney.com/110022.html"})
if resp.status_code != 200:
raise ValueError("request error")
data = json.loads(resp.text.split("(")[-1].split(")")[0])
# Some funds don't show the net value, example: http://fundf10.eastmoney.com/jjjz_010288.html
@@ -280,6 +283,7 @@ class FundCollector:
logger.warning(f"less than {self.min_numbers_trading} fund list: {list(self._mini_symbol_map.keys())}")
logger.info(f"total {len(self.fund_list)}, error: {len(set(fund_list))}")
class FundollectorCN(FundCollector, ABC):
def get_fund_list(self):
logger.info("get cn fund symbols......")
@@ -297,6 +301,7 @@ class FundCollectorCN1d(FundollectorCN):
def min_numbers_trading(self):
return 252 / 4
class Run:
def __init__(self, source_dir=None, max_workers=4, region=REGION_CN):
"""
@@ -354,9 +359,7 @@ class Run:
$ python collector.py download_data --source_dir ~/.qlib/fund_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
"""
_class = getattr(
self._cur_module, f"FundCollector{self.region.upper()}{interval}"
) # type: Type[FundCollector]
_class = getattr(self._cur_module, f"FundCollector{self.region.upper()}{interval}") # type: Type[FundCollector]
_class(
self.source_dir,
max_workers=self.max_workers,
@@ -369,5 +372,6 @@ class Run:
limit_nums=limit_nums,
).collector_data()
if __name__ == "__main__":
fire.Fire(Run)

View File

@@ -239,7 +239,7 @@ def get_en_fund_symbols(qlib_data_path: [str, Path] = None) -> list:
try:
_symbols = []
for sub_data in re.findall(r"[\[](.*?)[\]]", resp.content.decode().split("= [")[-1].replace("];", "")):
data = sub_data.replace("\"","").replace("'","")
data = sub_data.replace('"', "").replace("'", "")
# TODO: do we need other informations, like fund_name from ['000001', 'HXCZHH', '华夏成长混合', '混合型', 'HUAXIACHENGZHANGHUNHE']
_symbols.append(data.split(",")[0])
except Exception as e: