mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 14:01:28 +08:00
* init commit * change the version number * rich the docs&fix cache docs * update index readme * Modify cache class name * Modify sharpe to information_ratio * Modify Group- to Group * add the description of graphical results & fix the backtest docs * fix docs in details * update docs * Update introduction.rst * Update README.md * Update introduction.rst * Update introduction.rst * Update introduction.rst * Update installation.rst * Update installation.rst * Update initialization.rst * Update getdata.rst * Update integration.rst * Update initialization.rst * Update getdata.rst * Update estimator.rst Modify some typos. * Update README.md Modify the typos. * Update initialization.rst * Update data.rst * Update report.rst * Update estimator.rst * Update cumulative_return.py * Update model.rst * Update rank_label.py * Update cumulative_return.py * Update strategy.rst * Update getdata.rst * Update backtest.rst * Update integration.rst * Update getdata.rst * Update introduction.rst * Update introduction.rst * Update README.md * Update report.rst * Update integration.rst Fix typos * Update installation.rst Fix typos * Update getdata.rst * Update initialization.rst Fix typos. * add quick start docs&fix detials * fix estimator docs & fix strategy docs * fix the cahce in data.rst * update documents * Fix Corr && Rsquare * fix data retrival example to csi300 & fix a data bug * fix filter bug * Fix data collector * Modift model args * add the log & fix README.md\quick.rst * add enviroment depend & add intoduction of qlib-server online mode * fix image center fomat & set log_only of docs is True * fix README.md format * update data preparation & readme logo image * get_data support version * Modify analysis names * Modify analysis graph * update report.rst & data.rst * commmit estimator for merge * minimal requirements * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update READEME.md * Update READEME.md * update estimator * Fix doc urls * fix get_data.py docstring * update test_get_data.py * Upate docs * Upate docs * Upate docs Co-authored-by: bxdd <bxddream@gmail.com> Co-authored-by: zhupr <zhu.pengrong@foxmail.com> Co-authored-by: Wendi Li <wendili.academic@qq.com> Co-authored-by: Dingsu Wang <dingsu.wang@gmail.com> Co-authored-by: bxdd <45119470+bxdd@users.noreply.github.com> Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com>
94 lines
2.7 KiB
Python
94 lines
2.7 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT License.
|
|
|
|
import fire
|
|
import zipfile
|
|
import requests
|
|
from tqdm import tqdm
|
|
from pathlib import Path
|
|
from loguru import logger
|
|
|
|
|
|
class GetData:
|
|
REMOTE_URL = "http://fintech.msra.cn/stock_data/downloads"
|
|
|
|
def __init__(self, delete_zip_file=False):
|
|
"""
|
|
|
|
Parameters
|
|
----------
|
|
delete_zip_file : bool, optional
|
|
Whether to delete the zip file, value from True or False, by default False
|
|
"""
|
|
self.delete_zip_file = delete_zip_file
|
|
|
|
def _download_data(self, file_name: str, target_dir: [Path, str]):
|
|
target_dir = Path(target_dir).expanduser()
|
|
target_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
url = f"{self.REMOTE_URL}/{file_name}"
|
|
target_path = target_dir.joinpath(file_name)
|
|
|
|
resp = requests.get(url, stream=True)
|
|
if resp.status_code != 200:
|
|
raise requests.exceptions.HTTPError()
|
|
|
|
chuck_size = 1024
|
|
logger.info(f"{file_name} downloading......")
|
|
with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar:
|
|
with target_path.open("wb") as fp:
|
|
for chuck in resp.iter_content(chunk_size=chuck_size):
|
|
fp.write(chuck)
|
|
p_bar.update(chuck_size)
|
|
|
|
self._unzip(target_path, target_dir)
|
|
if self.delete_zip_file:
|
|
target_path.unlike()
|
|
|
|
@staticmethod
|
|
def _unzip(file_path: Path, target_dir: Path):
|
|
logger.info(f"{file_path} unzipping......")
|
|
with zipfile.ZipFile(str(file_path.resolve()), "r") as zp:
|
|
for _file in tqdm(zp.namelist()):
|
|
zp.extract(_file, str(target_dir.resolve()))
|
|
|
|
def qlib_data_cn(self, target_dir="~/.qlib/qlib_data/cn_data", version="v1"):
|
|
"""download cn qlib data from remote
|
|
|
|
Parameters
|
|
----------
|
|
target_dir: str
|
|
data save directory
|
|
version: str
|
|
data version, value from [v0, v1], by default v1
|
|
|
|
Examples
|
|
---------
|
|
python get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data --version v1
|
|
-------
|
|
|
|
"""
|
|
file_name = f"qlib_data_cn_{version}.zip"
|
|
self._download_data(file_name, target_dir)
|
|
|
|
def csv_data_cn(self, target_dir="~/.qlib/csv_data/cn_data"):
|
|
"""download cn csv data from remote
|
|
|
|
Parameters
|
|
----------
|
|
target_dir: str
|
|
data save directory
|
|
|
|
Examples
|
|
---------
|
|
python get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data
|
|
-------
|
|
|
|
"""
|
|
file_name = "csv_data_cn.zip"
|
|
self._download_data(file_name, target_dir)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(GetData)
|