diff --git a/README.md b/README.md index 787075d6a..6f416d420 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,12 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor ## Data Preparation Load and prepare data by running the following code: ```bash + # get 1d data python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn + + # get 1min data + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min + ``` This dataset is created by public data collected by [crawler scripts](scripts/data_collector/), which have been released in diff --git a/docs/component/data.rst b/docs/component/data.rst index dd32c5cd8..2d57fee32 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -48,8 +48,12 @@ Qlib Format Dataset .. code-block:: bash + # download 1d python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn + # download 1min + python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min + In addition to China-Stock data, ``Qlib`` also includes a US-Stock dataset, which can be downloaded with the following command: .. code-block:: bash diff --git a/qlib/tests/data.py b/qlib/tests/data.py index 6f8a0c9e0..3bf6a2c96 100644 --- a/qlib/tests/data.py +++ b/qlib/tests/data.py @@ -86,7 +86,6 @@ class GetData: @staticmethod def _delete_qlib_data(file_dir: Path): - logger.info(f"delete {file_dir}") rm_dirs = [] for _name in ["features", "calendars", "instruments", "features_cache", "dataset_cache"]: _p = file_dir.joinpath(_name) @@ -133,7 +132,11 @@ class GetData: Examples --------- + # get 1d data python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + + # get 1min data + python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --interval 1min --region cn ------- """ diff --git a/scripts/data_collector/yahoo/README.md b/scripts/data_collector/yahoo/README.md index d4c2d2c9c..ec233150c 100644 --- a/scripts/data_collector/yahoo/README.md +++ b/scripts/data_collector/yahoo/README.md @@ -21,7 +21,7 @@ pip install -r requirements.txt ### CN Data -#### 1d +#### 1d from yahoo ```bash @@ -33,18 +33,26 @@ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d # dump data cd qlib/scripts -python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol - -# using -import qlib -from qlib.data import D - -qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1d", region="CN") -df = D.features(D.instruments("all"), ["$close"], freq="day") +python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol ``` -#### 1min +### 1d from qlib +```bash +python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn +``` + +### using data + +```python +import qlib +from qlib.data import D + +qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1d", region="CN") +df = D.features(D.instruments("all"), ["$close"], freq="day") +``` + +#### 1min from yahoo ```bash @@ -56,20 +64,28 @@ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1mi # dump data cd qlib/scripts -python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1min --freq 1min --exclude_fields date,adjclose,dividends,splits,symbol +python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/qlib_data/qlib_cn_1min --freq 1min --exclude_fields date,adjclose,dividends,splits,symbol +``` -# using +### 1min from qlib +```bash +python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --interval 1min --region cn +``` + +### using data + +```python import qlib from qlib.data import D -qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1min", region="CN") +qlib.init(provider_uri="~/.qlib/qlib_data/qlib_cn_1min", region="CN") df = D.features(D.instruments("all"), ["$close"], freq="1min") ``` ### US Data -#### 1d +#### 1d from yahoo ```bash @@ -82,12 +98,22 @@ python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/us_1d # dump data cd qlib/scripts python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_us_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol +``` +#### 1d from qlib + +```bash +python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_us_1d --region us +``` + +### using data + +```python # using import qlib from qlib.data import D -qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_us_1d", region="US") +qlib.init(provider_uri="~/.qlib/qlib_data/qlib_us_1d", region="US") df = D.features(D.instruments("all"), ["$close"], freq="day") ```