1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00
Files
qlib/tests/test_dump_data.py
you-n-g de9e13b171 release-0.5.0 (#1)
* init commit

* change the version number

* rich the docs&fix cache docs

* update index readme

* Modify cache class name

* Modify sharpe to information_ratio

* Modify Group- to Group

* add the description of graphical results & fix the backtest docs

* fix docs in details

* update docs

* Update introduction.rst

* Update README.md

* Update introduction.rst

* Update introduction.rst

* Update introduction.rst

* Update installation.rst

* Update installation.rst

* Update initialization.rst

* Update getdata.rst

* Update integration.rst

* Update initialization.rst

* Update getdata.rst

* Update estimator.rst

Modify some typos.

* Update README.md

Modify the typos.

* Update initialization.rst

* Update data.rst

* Update report.rst

* Update estimator.rst

* Update cumulative_return.py

* Update model.rst

* Update rank_label.py

* Update cumulative_return.py

* Update strategy.rst

* Update getdata.rst

* Update backtest.rst

* Update integration.rst

* Update getdata.rst

* Update introduction.rst

* Update introduction.rst

* Update README.md

* Update report.rst

* Update integration.rst

Fix typos

* Update installation.rst

Fix typos

* Update getdata.rst

* Update initialization.rst

Fix typos.

* add quick start docs&fix detials

* fix estimator docs & fix strategy docs

* fix the cahce in data.rst

* update documents

* Fix Corr && Rsquare

* fix data retrival example to csi300 & fix a data bug

* fix filter bug

* Fix data collector

* Modift model args

* add the log & fix README.md\quick.rst

* add enviroment depend & add intoduction of qlib-server online mode

* fix image center fomat & set log_only of docs is True

* fix README.md format

* update data preparation & readme logo image

* get_data support version

* Modify analysis names

* Modify analysis graph

* update report.rst & data.rst

* commmit estimator for merge

* minimal requirements

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update READEME.md

* Update READEME.md

* update estimator

* Fix doc urls

* fix get_data.py docstring

* update test_get_data.py

* Upate docs

* Upate docs

* Upate docs

Co-authored-by: bxdd <bxddream@gmail.com>
Co-authored-by: zhupr <zhu.pengrong@foxmail.com>
Co-authored-by: Wendi Li <wendili.academic@qq.com>
Co-authored-by: Dingsu Wang <dingsu.wang@gmail.com>
Co-authored-by: bxdd <45119470+bxdd@users.noreply.github.com>
Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com>
2020-09-24 12:01:39 +08:00

89 lines
3.1 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
import shutil
import unittest
from pathlib import Path
import qlib
import numpy as np
import pandas as pd
from qlib.data import D
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
from dump_bin import DumpData
DATA_DIR = Path(__file__).parent.joinpath("test_data")
SOURCE_DIR = DATA_DIR.joinpath("source")
SOURCE_DIR.mkdir(exist_ok=True, parents=True)
QLIB_DIR = DATA_DIR.joinpath("qlib")
QLIB_DIR.mkdir(exist_ok=True, parents=True)
class TestDumpData(unittest.TestCase):
FIELDS = "open,close,high,low,volume,factor,change".split(",")
QLIB_FIELDS = list(map(lambda x: f"${x}", FIELDS))
DUMP_DATA = None
STOCK_NAMES = None
# simpe data
SIMPLE_DATA = None
@classmethod
def setUpClass(cls) -> None:
GetData().csv_data_cn(SOURCE_DIR)
TestDumpData.DUMP_DATA = DumpData(csv_path=SOURCE_DIR, qlib_dir=QLIB_DIR)
TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
provider_uri = str(QLIB_DIR.resolve())
qlib.init(
provider_uri=provider_uri,
expression_cache=None,
dataset_cache=None,
)
@classmethod
def tearDownClass(cls) -> None:
shutil.rmtree(str(DATA_DIR.resolve()))
def test_0_dump_calendars(self):
self.DUMP_DATA.dump_calendars()
ori_calendars = set(
map(
pd.Timestamp,
pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
)
)
res_calendars = set(D.calendar())
assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
def test_1_dump_instruments(self):
self.DUMP_DATA.dump_instruments()
ori_ins = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
res_ins = set(D.list_instruments(D.instruments("all"), as_list=True))
assert len(ori_ins - res_ins) == len(ori_ins - res_ins) == 0, "dump instruments failed"
def test_2_dump_features(self):
self.DUMP_DATA.dump_features(include_fields=self.FIELDS)
df = D.features(self.STOCK_NAMES, self.QLIB_FIELDS)
TestDumpData.SIMPLE_DATA = df.loc(axis=0)[self.STOCK_NAMES[0], :]
self.assertFalse(df.dropna().empty, "features data failed")
self.assertListEqual(list(df.columns), self.QLIB_FIELDS, "features columns failed")
def test_3_dump_features_simple(self):
stock = self.STOCK_NAMES[0]
dump_data = DumpData(csv_path=SOURCE_DIR.joinpath(f"{stock.lower()}.csv"), qlib_dir=QLIB_DIR)
dump_data.dump_features(include_fields=self.FIELDS, calendar_path=QLIB_DIR.joinpath("calendars", "day.txt"))
df = D.features([stock], self.QLIB_FIELDS)
self.assertEqual(len(df), len(TestDumpData.SIMPLE_DATA), "dump features simple failed")
self.assertTrue(np.isclose(df.dropna(), self.SIMPLE_DATA.dropna()).all(), "dump features simple failed")
if __name__ == "__main__":
unittest.main()