diff --git a/qlib/storage/__init__.py b/qlib/storage/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/qlib/storage/storage.py b/qlib/storage/storage.py new file mode 100644 index 000000000..dac0e167d --- /dev/null +++ b/qlib/storage/storage.py @@ -0,0 +1,154 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +import abc + +from typing import ( + Iterable, + overload, + TypeVar, + Tuple, + List, + Text, + Optional, + AbstractSet, + Mapping, + Iterator, +) + + +# calendar value type +CalVT = TypeVar("CalVT") + +# instrument value +InstVT = List[Tuple[CalVT, CalVT]] +# instrument key +InstKT = Text + + +FeatureVT = Tuple[int, float] + + +class CalendarStorage: + def __init__(self, uri: str): + self._uri = uri + + def append(self, obj: CalVT) -> None: + """ Append object to the end of the CalendarStorage. """ + raise NotImplementedError("Subclass of CalendarStorage must implement `append` method") + + def clear(self): + """ Remove all items from CalendarStorage. """ + raise NotImplementedError("Subclass of CalendarStorage must implement `clear` method") + + def extend(self, iterable: Iterable[CalVT]): + """ Extend list by appending elements from the iterable. """ + raise NotImplementedError("Subclass of CalendarStorage must implement `extend` method") + + @overload + @abc.abstractmethod + def __getitem__(self, s: slice) -> Iterable[CalVT]: + """x.__getitem__(slice(start: int, stop: int, step: int)) <==> x[start:stop:step]""" + raise NotImplementedError("Subclass of CalendarStorage must implement `__getitem__(s: slice)` method") + + @abc.abstractmethod + def __getitem__(self, i: int) -> CalVT: + """x.__getitem__(y) <==> x[y]""" + + raise NotImplementedError("Subclass of CalendarStorage must implement `__getitem__(i: int)` method") + + @abc.abstractmethod + def __iter__(self) -> Iterator[CalVT]: + """ Implement iter(self). """ + raise NotImplementedError("Subclass of CalendarStorage must implement `__iter__` method") + + def __len__(self) -> int: + raise NotImplementedError("Subclass of CalendarStorage must implement `__len__` method") + + +class InstrumentStorage: + def __init__(self, uri: str): + self._uri = uri + + def clear(self) -> None: + """ D.clear() -> None. Remove all items from D. """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `clear` method") + + @abc.abstractmethod + def get(self, k: InstKT) -> Optional[InstVT]: + """D.get(k) -> InstV or None""" + raise NotImplementedError("Subclass of InstrumentStorage must implement `get` method") + + @abc.abstractmethod + def items(self) -> AbstractSet[Tuple[InstKT, InstVT]]: + """ D.items() -> a set-like object providing a view on D's items """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `items` method") + + @abc.abstractmethod + def keys(self) -> AbstractSet[InstKT]: + """ D.keys() -> a set-like object providing a view on D's keys """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `keys` method") + + def update(self, e: Mapping[InstKT, InstVT] = None, **f: InstVT) -> None: + """ + D.update([e, ]**f) -> None. Update D from dict/iterable e and f. + If e is present and has a .keys() method, then does: for k in e: D[k] = e[k] + If e is present and lacks a .keys() method, then does: for k, v in e: D[k] = v + In either case, this is followed by: for k in f: D[k] = f[k] + """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `update` method") + + def __setitem__(self, k: InstKT, v: InstVT) -> None: + """ Set self[key] to value. """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `__setitem__` method") + + def __delitem__(self, k: InstKT) -> None: + """ Delete self[key]. """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `__delitem__` method") + + @abc.abstractmethod + def __getitem__(self, k: InstKT) -> InstVT: + """ x.__getitem__(y) <==> x[y] """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `__getitem__` method") + + def __len__(self) -> int: + """ Return len(self). """ + raise NotImplementedError("Subclass of InstrumentStorage must implement `__len__` method") + + +class FeatureStorage: + def __init__(self, uri: str): + self._uri = uri + + def append(self, obj: FeatureVT) -> None: + """ Append object to the end of the FeatureStorage. """ + raise NotImplementedError("Subclass of FeatureStorage must implement `append` method") + + def clear(self): + """ Remove all items from FeatureStorage. """ + raise NotImplementedError("Subclass of FeatureStorage must implement `clear` method") + + def extend(self, iterable: Iterable[FeatureVT]): + """ Extend list by appending elements from the iterable. """ + raise NotImplementedError("Subclass of FeatureStorage must implement `extend` method") + + @overload + @abc.abstractmethod + def __getitem__(self, s: slice) -> Iterable[FeatureVT]: + """x.__getitem__(slice(start: int, stop: int, step: int)) <==> x[start:stop:step]""" + raise NotImplementedError("Subclass of FeatureStorage must implement `__getitem__(s: slice)` method") + + @abc.abstractmethod + def __getitem__(self, i: int) -> float: + """x.__getitem__(y) <==> x[y]""" + + raise NotImplementedError("Subclass of FeatureStorage must implement `__getitem__(i: int)` method") + + def __len__(self) -> int: + raise NotImplementedError("Subclass of FeatureStorage must implement `__len__` method") + + @abc.abstractmethod + def __iter__(self) -> Iterator[FeatureVT]: + """ Implement iter(self). """ + raise NotImplementedError("Subclass of FeatureStorage must implement `__iter__` method") diff --git a/tests/storage_tests/test_storage.py b/tests/storage_tests/test_storage.py new file mode 100644 index 000000000..d4b37be77 --- /dev/null +++ b/tests/storage_tests/test_storage.py @@ -0,0 +1,174 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from pathlib import Path +from importlib.util import spec_from_file_location, module_from_spec + +import pandas as pd + + +# TODO: set STORAGE_NAME +STORAGE_NAME = "" +STORAGE_FILE_PATH = Path("") +# TODO: set value +CALENDAR_URI = "" +INSTRUMENT_URI = "" +FEATURE_URI = "" + + +def get_module(module_path: Path): + module_spec = spec_from_file_location("", module_path) + module = module_from_spec(module_spec) + module_spec.loader.exec_module(module) + return module + + +STORAGE_MODULE = get_module(STORAGE_FILE_PATH) + + +CalendarStorage = getattr(STORAGE_MODULE, f"{STORAGE_NAME.title()}CalendarStorage") +InstrumentStorage = getattr(STORAGE_MODULE, f"{STORAGE_NAME.title()}InstrumentStorage") +FeatureStorage = getattr(STORAGE_MODULE, f"{STORAGE_NAME.title()}FeatureStorage") + + +class TestCalendarStorage: + def test_calendar_storage(self): + # calendar value: pd.date_range(start="2005-01-01", stop="2005-03-01", freq="1D") + start_date = "2005-01-01" + end_date = "2005-03-01" + values = pd.date_range(start_date, end_date, freq="1D") + + calendar = CalendarStorage(uri=CALENDAR_URI) + # test `__iter__` + for _s, _t in zip(calendar, values): + assert pd.Timestamp(_s) == pd.Timestamp(_t), f"{calendar.__name__}.__iter__ error" + + # test `__getitem__(self, s: slice)` + for _s, _t in zip(calendar[1:3], values[1:3]): + assert pd.Timestamp(_s) == pd.Timestamp(_t), f"{calendar.__name__}.__getitem__(s: slice) error" + + # test `__getitem__(self, i)` + assert pd.Timestamp(calendar[0]) == pd.Timestamp(values[0]), f"{calendar.__name__}.__getitem__(i: int) error" + + def test_instrument_storage(self): + """ + The meaning of instrument, such as CSI500: + + CSI500 composition changes: + + date add remove + 2005-01-01 SH600000 + 2005-01-01 SH600001 + 2005-01-01 SH600002 + 2005-02-01 SH600003 SH600000 + 2005-02-15 SH600000 SH600002 + + Calendar: + pd.date_range(start="2020-01-01", stop="2020-03-01", freq="1D") + + Instrument: + symbol start_time end_time + SH600000 2005-01-01 2005-01-31 (2005-02-01 Last trading day) + SH600000 2005-02-15 2005-03-01 + SH600001 2005-01-01 2005-03-01 + SH600002 2005-01-01 2005-02-14 (2005-02-15 Last trading day) + SH600003 2005-02-01 2005-03-01 + + InstrumentStorage: + { + "SH600000": [(2005-01-01, 2005-01-31), (2005-02-15, 2005-03-01)], + "SH600001": [(2005-01-01, 2005-03-01)], + "SH600002": [(2005-01-01, 2005-02-14)], + "SH600003": [(2005-02-01, 2005-03-01)], + } + + """ + base_instrument = { + "SH600000": [("2005-01-01", "2005-01-31"), ("2005-02-15", "2005-03-01")], + "SH600001": [("2005-01-01", "2005-03-01")], + "SH600002": [("2005-01-01", "2005-02-14")], + "SH600003": [("2005-02-01", "2005-03-01")], + } + instrument = InstrumentStorage(uri=INSTRUMENT_URI) + + # test `keys` + assert sorted(instrument.keys()) == sorted(base_instrument.keys()), f"{instrument.__name__}.keys error" + # test `__getitem__` + assert instrument["SH600000"] == base_instrument["SH600000"], f"{instrument.__name__}.__getitem__ error" + # test `get` + assert instrument.get("SH600001") == base_instrument.get("SH600001"), f"{instrument.__name__}.get error" + # test `items` + for _item in instrument.items(): + assert base_instrument[_item[0]] == _item[1] + assert len(instrument.items()) == len(instrument) == len(base_instrument), f"{instrument.__name__}.items error" + + def test_feature_storage(self): + """ + Calendar: + pd.date_range(start="2005-01-01", stop="2005-03-01", freq="1D") + + Instrument: + { + "SH600000": [(2005-01-01, 2005-01-31), (2005-02-15, 2005-03-01)], + "SH600001": [(2005-01-01, 2005-03-01)], + "SH600002": [(2005-01-01, 2005-02-14)], + "SH600003": [(2005-02-01, 2005-03-01)], + } + + Feature: + Stock data(close): + 2005-01-01 ... 2005-02-01 ... 2005-02-14 2005-02-15 ... 2005-03-01 + SH600000 1 ... 3 ... 4 5 6 + SH600001 1 ... 4 ... 5 6 7 + SH600002 1 ... 5 ... 6 nan nan + SH600003 nan ... 1 ... 2 3 4 + + FeatureStorage(SH600000, close): + + [ + (calendar.index("2005-01-01"), 1), + ..., + (calendar.index("2005-03-01"), 6) + ] + + ====> [(0, 1), ..., (59, 6)] + + + FeatureStorage(SH600002, close): + + [ + (calendar.index("2005-01-01"), 1), + ..., + (calendar.index("2005-02-14"), 6) + ] + + ===> [(0, 1), ..., (44, 6)] + + FeatureStorage(SH600003, close): + + [ + (calendar.index("2005-02-01"), 1), + ..., + (calendar.index("2005-03-01"), 4) + ] + + ===> [(31, 1), ..., (59, 4)] + + """ + + # FeatureStorage(SH600003, close) + feature = FeatureStorage(uri=FEATURE_URI) + # 2005-02-01 and 2005-03-01 + assert feature[31] == 1 and feature[59] == 4, f"{feature.__name__}.__getitem__(i: int) error" + + # 2005-02-01, 2005-02-02, 2005-02-03 + # close_items: [(31, 1), ..., (33, )] + close_items = feature[31:34] + + # 2005-02-01, ..., 2005-03-01 + # feature: [(31, 1), ..., (59, 4)] + print(feature) + + assert ( + len(feature) == len(feature[:]) == len(feature[31:60]) == 29 + ), f"{feature.__name__}.items/__getitem__(s: slice) error"