1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

optimize get_data code

This commit is contained in:
Linlang
2024-03-06 15:17:04 +08:00
parent 0c14952136
commit fd0863b0bb
3 changed files with 44 additions and 1 deletions

View File

@@ -27,7 +27,7 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency
2. Please follow following steps to download example data
```bash
cd examples/orderbook_data/
python scripts/get_data.py qlib_data --target_dir "~/.qlib/orderbook_data" --name orderbook_data
python get_data.py other_data --target_dir ~/.qlib/other_data/orderbook_data --name highfreq_orderbook_example_data.zip
```
3. Please import the example data to your mongo db

View File

@@ -130,6 +130,47 @@ class GetData:
logger.warning(f"delete: {_p}")
shutil.rmtree(_p)
def other_data(
self,
target_dir="~/.qlib/other_data",
name=None,
version=None,
delete_old=True,
):
"""Specifies a file name to download a file from a remote location.
Parameters
----------
target_dir: str
data save directory
name: str
filename, by default None
version: str
data version, value from [v1, ...], by default None(use script to specify version)
delete_old: bool
delete an existing directory, by default True
Examples
---------
# get orderbook data
python get_data.py other_data --target_dir ~/.qlib/other_data/orderbook_data --name highfreq_orderbook_example_data.zip
When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/highfreq_orderbook_example_data.zip?{token}
-------
"""
if name == None:
logger.warning("Specify the name of the file to be downloaded.")
return
file_name = f"{version}/{name}"
file_name = name if version == None else file_name
if not self.check_dataset(file_name):
logger.warning("The file you specified does not exist in the remote repository.")
return
self.download_data(file_name.lower(), target_dir, delete_old)
def qlib_data(
self,
name="qlib_data",

View File

@@ -9,6 +9,8 @@ from qlib.tests import TestAutoData
class WorkflowTest(TestAutoData):
# Creating the directory manually doesn't work with mlflow,
# so we add a subfolder named .trash when we create the directory.
TMP_PATH = Path("./.mlruns_tmp/.trash")
def tearDown(self) -> None: