mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 14:01:28 +08:00
* Intermediate version * Fix yaml template & Successfully run rolling * Be compatible with benchmark * Get same results with previous linear model * Black formatting * Update black * Update the placeholder mechanism * Update CI * Update CI * Upgrade Black * Fix CI and simplify code * Fix CI * Move the data processing caching mechanism into utils. * Adjusting DDG-DA * Organize import
171 lines
6.1 KiB
Python
171 lines
6.1 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT License.
|
|
|
|
|
|
from pathlib import Path
|
|
from collections.abc import Iterable
|
|
|
|
import numpy as np
|
|
from qlib.tests import TestAutoData
|
|
|
|
from qlib.data.storage.file_storage import (
|
|
FileCalendarStorage as CalendarStorage,
|
|
FileInstrumentStorage as InstrumentStorage,
|
|
FileFeatureStorage as FeatureStorage,
|
|
)
|
|
|
|
_file_name = Path(__file__).name.split(".")[0]
|
|
DATA_DIR = Path(__file__).parent.joinpath(f"{_file_name}_data")
|
|
QLIB_DIR = DATA_DIR.joinpath("qlib")
|
|
QLIB_DIR.mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
class TestStorage(TestAutoData):
|
|
def test_calendar_storage(self):
|
|
calendar = CalendarStorage(freq="day", future=False, provider_uri=self.provider_uri)
|
|
assert isinstance(calendar[:], Iterable), f"{calendar.__class__.__name__}.__getitem__(s: slice) is not Iterable"
|
|
assert isinstance(calendar.data, Iterable), f"{calendar.__class__.__name__}.data is not Iterable"
|
|
|
|
print(f"calendar[1: 5]: {calendar[1:5]}")
|
|
print(f"calendar[0]: {calendar[0]}")
|
|
print(f"calendar[-1]: {calendar[-1]}")
|
|
|
|
calendar = CalendarStorage(freq="1min", future=False, provider_uri="not_found")
|
|
with self.assertRaises(ValueError):
|
|
print(calendar.data)
|
|
|
|
with self.assertRaises(ValueError):
|
|
print(calendar[:])
|
|
|
|
with self.assertRaises(ValueError):
|
|
print(calendar[0])
|
|
|
|
def test_instrument_storage(self):
|
|
"""
|
|
The meaning of instrument, such as CSI500:
|
|
|
|
CSI500 composition changes:
|
|
|
|
date add remove
|
|
2005-01-01 SH600000
|
|
2005-01-01 SH600001
|
|
2005-01-01 SH600002
|
|
2005-02-01 SH600003 SH600000
|
|
2005-02-15 SH600000 SH600002
|
|
|
|
Calendar:
|
|
pd.date_range(start="2020-01-01", stop="2020-03-01", freq="1D")
|
|
|
|
Instrument:
|
|
symbol start_time end_time
|
|
SH600000 2005-01-01 2005-01-31 (2005-02-01 Last trading day)
|
|
SH600000 2005-02-15 2005-03-01
|
|
SH600001 2005-01-01 2005-03-01
|
|
SH600002 2005-01-01 2005-02-14 (2005-02-15 Last trading day)
|
|
SH600003 2005-02-01 2005-03-01
|
|
|
|
InstrumentStorage:
|
|
{
|
|
"SH600000": [(2005-01-01, 2005-01-31), (2005-02-15, 2005-03-01)],
|
|
"SH600001": [(2005-01-01, 2005-03-01)],
|
|
"SH600002": [(2005-01-01, 2005-02-14)],
|
|
"SH600003": [(2005-02-01, 2005-03-01)],
|
|
}
|
|
|
|
"""
|
|
|
|
instrument = InstrumentStorage(market="csi300", provider_uri=self.provider_uri, freq="day")
|
|
|
|
for inst, spans in instrument.data.items():
|
|
assert isinstance(inst, str) and isinstance(
|
|
spans, Iterable
|
|
), f"{instrument.__class__.__name__} value is not Iterable"
|
|
for s_e in spans:
|
|
assert (
|
|
isinstance(s_e, tuple) and len(s_e) == 2
|
|
), f"{instrument.__class__.__name__}.__getitem__(k) TypeError"
|
|
|
|
print(f"instrument['SH600000']: {instrument['SH600000']}")
|
|
|
|
instrument = InstrumentStorage(market="csi300", provider_uri="not_found", freq="day")
|
|
with self.assertRaises(ValueError):
|
|
print(instrument.data)
|
|
|
|
with self.assertRaises(ValueError):
|
|
print(instrument["sSH600000"])
|
|
|
|
def test_feature_storage(self):
|
|
"""
|
|
Calendar:
|
|
pd.date_range(start="2005-01-01", stop="2005-03-01", freq="1D")
|
|
|
|
Instrument:
|
|
{
|
|
"SH600000": [(2005-01-01, 2005-01-31), (2005-02-15, 2005-03-01)],
|
|
"SH600001": [(2005-01-01, 2005-03-01)],
|
|
"SH600002": [(2005-01-01, 2005-02-14)],
|
|
"SH600003": [(2005-02-01, 2005-03-01)],
|
|
}
|
|
|
|
Feature:
|
|
Stock data(close):
|
|
2005-01-01 ... 2005-02-01 ... 2005-02-14 2005-02-15 ... 2005-03-01
|
|
SH600000 1 ... 3 ... 4 5 6
|
|
SH600001 1 ... 4 ... 5 6 7
|
|
SH600002 1 ... 5 ... 6 nan nan
|
|
SH600003 nan ... 1 ... 2 3 4
|
|
|
|
FeatureStorage(SH600000, close):
|
|
|
|
[
|
|
(calendar.index("2005-01-01"), 1),
|
|
...,
|
|
(calendar.index("2005-03-01"), 6)
|
|
]
|
|
|
|
====> [(0, 1), ..., (59, 6)]
|
|
|
|
|
|
FeatureStorage(SH600002, close):
|
|
|
|
[
|
|
(calendar.index("2005-01-01"), 1),
|
|
...,
|
|
(calendar.index("2005-02-14"), 6)
|
|
]
|
|
|
|
===> [(0, 1), ..., (44, 6)]
|
|
|
|
FeatureStorage(SH600003, close):
|
|
|
|
[
|
|
(calendar.index("2005-02-01"), 1),
|
|
...,
|
|
(calendar.index("2005-03-01"), 4)
|
|
]
|
|
|
|
===> [(31, 1), ..., (59, 4)]
|
|
|
|
"""
|
|
|
|
feature = FeatureStorage(instrument="SZ300677", field="close", freq="day", provider_uri=self.provider_uri)
|
|
|
|
with self.assertRaises(IndexError):
|
|
print(feature[0])
|
|
assert isinstance(
|
|
feature[3049][1], (float, np.float32)
|
|
), f"{feature.__class__.__name__}.__getitem__(i: int) error"
|
|
assert len(feature[3049:3052]) == 3, f"{feature.__class__.__name__}.__getitem__(s: slice) error"
|
|
print(f"feature[3049: 3052]: \n{feature[3049: 3052]}")
|
|
|
|
print(f"feature[:].tail(): \n{feature[:].tail()}")
|
|
|
|
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri="not_fount")
|
|
|
|
with self.assertRaises(ValueError):
|
|
print(feature[0])
|
|
with self.assertRaises(ValueError):
|
|
print(feature[:].empty)
|
|
with self.assertRaises(ValueError):
|
|
print(feature.data.empty)
|