1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 14:01:28 +08:00
Files
qlib/scripts/get_data.py
you-n-g de9e13b171 release-0.5.0 (#1)
* init commit

* change the version number

* rich the docs&fix cache docs

* update index readme

* Modify cache class name

* Modify sharpe to information_ratio

* Modify Group- to Group

* add the description of graphical results & fix the backtest docs

* fix docs in details

* update docs

* Update introduction.rst

* Update README.md

* Update introduction.rst

* Update introduction.rst

* Update introduction.rst

* Update installation.rst

* Update installation.rst

* Update initialization.rst

* Update getdata.rst

* Update integration.rst

* Update initialization.rst

* Update getdata.rst

* Update estimator.rst

Modify some typos.

* Update README.md

Modify the typos.

* Update initialization.rst

* Update data.rst

* Update report.rst

* Update estimator.rst

* Update cumulative_return.py

* Update model.rst

* Update rank_label.py

* Update cumulative_return.py

* Update strategy.rst

* Update getdata.rst

* Update backtest.rst

* Update integration.rst

* Update getdata.rst

* Update introduction.rst

* Update introduction.rst

* Update README.md

* Update report.rst

* Update integration.rst

Fix typos

* Update installation.rst

Fix typos

* Update getdata.rst

* Update initialization.rst

Fix typos.

* add quick start docs&fix detials

* fix estimator docs & fix strategy docs

* fix the cahce in data.rst

* update documents

* Fix Corr && Rsquare

* fix data retrival example to csi300 & fix a data bug

* fix filter bug

* Fix data collector

* Modift model args

* add the log & fix README.md\quick.rst

* add enviroment depend & add intoduction of qlib-server online mode

* fix image center fomat & set log_only of docs is True

* fix README.md format

* update data preparation & readme logo image

* get_data support version

* Modify analysis names

* Modify analysis graph

* update report.rst & data.rst

* commmit estimator for merge

* minimal requirements

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update READEME.md

* Update READEME.md

* update estimator

* Fix doc urls

* fix get_data.py docstring

* update test_get_data.py

* Upate docs

* Upate docs

* Upate docs

Co-authored-by: bxdd <bxddream@gmail.com>
Co-authored-by: zhupr <zhu.pengrong@foxmail.com>
Co-authored-by: Wendi Li <wendili.academic@qq.com>
Co-authored-by: Dingsu Wang <dingsu.wang@gmail.com>
Co-authored-by: bxdd <45119470+bxdd@users.noreply.github.com>
Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com>
2020-09-24 12:01:39 +08:00

94 lines
2.7 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import fire
import zipfile
import requests
from tqdm import tqdm
from pathlib import Path
from loguru import logger
class GetData:
REMOTE_URL = "http://fintech.msra.cn/stock_data/downloads"
def __init__(self, delete_zip_file=False):
"""
Parameters
----------
delete_zip_file : bool, optional
Whether to delete the zip file, value from True or False, by default False
"""
self.delete_zip_file = delete_zip_file
def _download_data(self, file_name: str, target_dir: [Path, str]):
target_dir = Path(target_dir).expanduser()
target_dir.mkdir(exist_ok=True, parents=True)
url = f"{self.REMOTE_URL}/{file_name}"
target_path = target_dir.joinpath(file_name)
resp = requests.get(url, stream=True)
if resp.status_code != 200:
raise requests.exceptions.HTTPError()
chuck_size = 1024
logger.info(f"{file_name} downloading......")
with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar:
with target_path.open("wb") as fp:
for chuck in resp.iter_content(chunk_size=chuck_size):
fp.write(chuck)
p_bar.update(chuck_size)
self._unzip(target_path, target_dir)
if self.delete_zip_file:
target_path.unlike()
@staticmethod
def _unzip(file_path: Path, target_dir: Path):
logger.info(f"{file_path} unzipping......")
with zipfile.ZipFile(str(file_path.resolve()), "r") as zp:
for _file in tqdm(zp.namelist()):
zp.extract(_file, str(target_dir.resolve()))
def qlib_data_cn(self, target_dir="~/.qlib/qlib_data/cn_data", version="v1"):
"""download cn qlib data from remote
Parameters
----------
target_dir: str
data save directory
version: str
data version, value from [v0, v1], by default v1
Examples
---------
python get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data --version v1
-------
"""
file_name = f"qlib_data_cn_{version}.zip"
self._download_data(file_name, target_dir)
def csv_data_cn(self, target_dir="~/.qlib/csv_data/cn_data"):
"""download cn csv data from remote
Parameters
----------
target_dir: str
data save directory
Examples
---------
python get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data
-------
"""
file_name = "csv_data_cn.zip"
self._download_data(file_name, target_dir)
if __name__ == "__main__":
fire.Fire(GetData)