mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
remove uri parameter from storage && modify file_storage
This commit is contained in:
@@ -53,6 +53,34 @@ Cache
|
||||
.. autoclass:: qlib.data.cache.DiskDatasetCache
|
||||
:members:
|
||||
|
||||
|
||||
Storage
|
||||
-------------
|
||||
.. autoclass:: qlib.data.storage.storage.BaseStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.storage.CalendarStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.storage.InstrumentStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.storage.FeatureStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.file_storage.FileStorageMixin
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.file_storage.FileCalendarStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.file_storage.FileInstrumentStorage
|
||||
:members:
|
||||
|
||||
.. autoclass:: qlib.data.storage.file_storage.FileFeatureStorage
|
||||
:members:
|
||||
|
||||
|
||||
Dataset
|
||||
---------------
|
||||
|
||||
|
||||
@@ -45,12 +45,12 @@ class ProviderBackendMixin:
|
||||
|
||||
# set default storage kwargs
|
||||
backend_kwargs = backend.setdefault("kwargs", {})
|
||||
# default uri map
|
||||
if "uri" not in backend_kwargs:
|
||||
# default provider_uri map
|
||||
if "provider_uri" not in backend_kwargs:
|
||||
# if the user has no uri configured, use: uri = uri_map[freq]
|
||||
freq = kwargs.get("freq", "day")
|
||||
uri_map = backend_kwargs.setdefault("uri_map", {freq: C.get_data_path()})
|
||||
backend_kwargs["uri"] = uri_map[freq]
|
||||
provider_uri_map = backend_kwargs.setdefault("provider_uri_map", {freq: C.get_data_path()})
|
||||
backend_kwargs["provider_uri"] = provider_uri_map[freq]
|
||||
backend.setdefault("kwargs", {}).update(**kwargs)
|
||||
return init_instance_by_config(backend)
|
||||
|
||||
@@ -556,17 +556,21 @@ class LocalCalendarProvider(CalendarProvider):
|
||||
list of timestamps
|
||||
"""
|
||||
|
||||
backend_obj = self.backend_obj(freq=freq, future=future)
|
||||
if future and not backend_obj.check_exists():
|
||||
get_module_logger("data").warning(
|
||||
f"load calendar error: freq={freq}, future={future}; return current calendar!"
|
||||
)
|
||||
get_module_logger("data").warning(
|
||||
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
|
||||
)
|
||||
backend_obj = self.backend_obj(freq=freq, future=False)
|
||||
try:
|
||||
backend_obj = self.backend_obj(freq=freq, future=future).data
|
||||
except ValueError:
|
||||
if future:
|
||||
get_module_logger("data").warning(
|
||||
f"load calendar error: freq={freq}, future={future}; return current calendar!"
|
||||
)
|
||||
get_module_logger("data").warning(
|
||||
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
|
||||
)
|
||||
backend_obj = self.backend_obj(freq=freq, future=False).data
|
||||
else:
|
||||
raise
|
||||
|
||||
return [pd.Timestamp(x) for x in backend_obj.data]
|
||||
return [pd.Timestamp(x) for x in backend_obj]
|
||||
|
||||
def calendar(self, start_time=None, end_time=None, freq="day", future=False):
|
||||
_calendar, _calendar_index = self._get_calendar(freq, future)
|
||||
@@ -659,14 +663,7 @@ class LocalFeatureProvider(FeatureProvider):
|
||||
# validate
|
||||
field = str(field).lower()[1:]
|
||||
instrument = code_to_fname(instrument)
|
||||
try:
|
||||
data = self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1]
|
||||
except Exception as e:
|
||||
get_module_logger("data").warning(
|
||||
f"WARN: data not found for {instrument}.{field}\n\tFeature exception info: {str(e)}"
|
||||
)
|
||||
data = pd.Series(dtype=np.float32)
|
||||
return data
|
||||
return self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1]
|
||||
|
||||
|
||||
class LocalExpressionProvider(ExpressionProvider):
|
||||
|
||||
@@ -14,19 +14,35 @@ from qlib.data.storage import CalendarStorage, InstrumentStorage, FeatureStorage
|
||||
logger = get_module_logger("file_storage")
|
||||
|
||||
|
||||
class FileStorage:
|
||||
def check_exists(self):
|
||||
return self.uri.exists()
|
||||
class FileStorageMixin:
|
||||
@property
|
||||
def uri(self) -> Path:
|
||||
_provider_uri = self.kwargs.get("provider_uri", None)
|
||||
if _provider_uri is None:
|
||||
raise ValueError(
|
||||
f"The `provider_uri` parameter is not found in {self.__class__.__name__}, "
|
||||
f'please specify `provider_uri` in the "provider\'s backend"'
|
||||
)
|
||||
return Path(_provider_uri).expanduser().joinpath(f"{self.storage_name}s", self.file_name)
|
||||
|
||||
def check(self):
|
||||
"""check self.uri
|
||||
|
||||
Raises
|
||||
-------
|
||||
ValueError
|
||||
"""
|
||||
if not self.uri.exists():
|
||||
raise ValueError(f"{self.storage_name} not exists: {self.uri}")
|
||||
|
||||
|
||||
class FileCalendarStorage(FileStorage, CalendarStorage):
|
||||
def __init__(self, freq: str, future: bool, uri: str, **kwargs):
|
||||
super(FileCalendarStorage, self).__init__(freq, future, uri, **kwargs)
|
||||
_file_name = f"{freq}_future.txt" if future else f"{freq}.txt"
|
||||
self.uri = Path(self.uri).expanduser().joinpath("calendars", _file_name.lower())
|
||||
class FileCalendarStorage(FileStorageMixin, CalendarStorage):
|
||||
def __init__(self, freq: str, future: bool, **kwargs):
|
||||
super(FileCalendarStorage, self).__init__(freq, future, **kwargs)
|
||||
self.file_name = f"{freq}_future.txt" if future else f"{freq}.txt".lower()
|
||||
|
||||
def _read_calendar(self, skip_rows: int = 0, n_rows: int = None) -> Iterable[CalVT]:
|
||||
if not self.check_exists():
|
||||
def _read_calendar(self, skip_rows: int = 0, n_rows: int = None) -> List[CalVT]:
|
||||
if not self.uri.exists():
|
||||
self._write_calendar(values=[])
|
||||
with self.uri.open("rb") as fp:
|
||||
return [
|
||||
@@ -39,7 +55,8 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
|
||||
np.savetxt(fp, values, fmt="%s", encoding="utf-8")
|
||||
|
||||
@property
|
||||
def data(self) -> Iterable[CalVT]:
|
||||
def data(self) -> List[CalVT]:
|
||||
self.check()
|
||||
return self._read_calendar()
|
||||
|
||||
def extend(self, values: Iterable[CalVT]) -> None:
|
||||
@@ -49,6 +66,7 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
|
||||
self._write_calendar(values=[])
|
||||
|
||||
def index(self, value: CalVT) -> int:
|
||||
self.check()
|
||||
calendar = self._read_calendar()
|
||||
return int(np.argwhere(calendar == value)[0])
|
||||
|
||||
@@ -58,6 +76,7 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
|
||||
self._write_calendar(values=calendar)
|
||||
|
||||
def remove(self, value: CalVT) -> None:
|
||||
self.check()
|
||||
index = self.index(value)
|
||||
calendar = self._read_calendar()
|
||||
calendar = np.delete(calendar, index)
|
||||
@@ -69,24 +88,29 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
|
||||
self._write_calendar(values=calendar)
|
||||
|
||||
def __delitem__(self, i: Union[int, slice]) -> None:
|
||||
self.check()
|
||||
calendar = self._read_calendar()
|
||||
calendar = np.delete(calendar, i)
|
||||
self._write_calendar(values=calendar)
|
||||
|
||||
def __getitem__(self, i: Union[int, slice]) -> Union[CalVT, Iterable[CalVT]]:
|
||||
def __getitem__(self, i: Union[int, slice]) -> Union[CalVT, List[CalVT]]:
|
||||
self.check()
|
||||
return self._read_calendar()[i]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.data)
|
||||
|
||||
class FileInstrumentStorage(FileStorage, InstrumentStorage):
|
||||
|
||||
class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
|
||||
|
||||
INSTRUMENT_SEP = "\t"
|
||||
INSTRUMENT_START_FIELD = "start_datetime"
|
||||
INSTRUMENT_END_FIELD = "end_datetime"
|
||||
SYMBOL_FIELD_NAME = "instrument"
|
||||
|
||||
def __init__(self, market: str, uri: str, **kwargs):
|
||||
super(FileInstrumentStorage, self).__init__(market, uri, **kwargs)
|
||||
self.uri = Path(self.uri).expanduser().joinpath("instruments", f"{market.lower()}.txt")
|
||||
def __init__(self, market: str, **kwargs):
|
||||
super(FileInstrumentStorage, self).__init__(market, **kwargs)
|
||||
self.file_name = f"{market.lower()}.txt"
|
||||
|
||||
def _read_instrument(self) -> Dict[InstKT, InstVT]:
|
||||
if not self.uri.exists():
|
||||
@@ -128,6 +152,7 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
|
||||
|
||||
@property
|
||||
def data(self) -> Dict[InstKT, InstVT]:
|
||||
self.check()
|
||||
return self._read_instrument()
|
||||
|
||||
def __setitem__(self, k: InstKT, v: InstVT) -> None:
|
||||
@@ -136,11 +161,13 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
|
||||
self._write_instrument(inst)
|
||||
|
||||
def __delitem__(self, k: InstKT) -> None:
|
||||
self.check()
|
||||
inst = self._read_instrument()
|
||||
del inst[k]
|
||||
self._write_instrument(inst)
|
||||
|
||||
def __getitem__(self, k: InstKT) -> InstVT:
|
||||
self.check()
|
||||
return self._read_instrument()[k]
|
||||
|
||||
def update(self, *args, **kwargs) -> None:
|
||||
@@ -164,13 +191,14 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
|
||||
|
||||
self._write_instrument(inst)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.data)
|
||||
|
||||
class FileFeatureStorage(FileStorage, FeatureStorage):
|
||||
def __init__(self, instrument: str, field: str, freq: str, uri: str, **kwargs):
|
||||
super(FileFeatureStorage, self).__init__(instrument, field, freq, uri, **kwargs)
|
||||
self.uri = (
|
||||
Path(self.uri).expanduser().joinpath("features", instrument.lower(), f"{field.lower()}.{freq.lower()}.bin")
|
||||
)
|
||||
|
||||
class FileFeatureStorage(FileStorageMixin, FeatureStorage):
|
||||
def __init__(self, instrument: str, field: str, freq: str, **kwargs):
|
||||
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
|
||||
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
|
||||
|
||||
def clear(self):
|
||||
with self.uri.open("wb") as _:
|
||||
@@ -214,35 +242,44 @@ class FileFeatureStorage(FileStorage, FeatureStorage):
|
||||
|
||||
@property
|
||||
def start_index(self) -> Union[int, None]:
|
||||
if len(self) == 0:
|
||||
if not self.uri.exists():
|
||||
return None
|
||||
with open(self.uri, "rb") as fp:
|
||||
with self.uri.open("rb") as fp:
|
||||
index = int(np.frombuffer(fp.read(4), dtype="<f")[0])
|
||||
return index
|
||||
|
||||
@property
|
||||
def end_index(self) -> Union[int, None]:
|
||||
if not self.uri.exists():
|
||||
return None
|
||||
# The next data appending index point will be `end_index + 1`
|
||||
return self.start_index + len(self) - 1
|
||||
|
||||
def __getitem__(self, i: Union[int, slice]) -> Union[Tuple[int, float], pd.Series]:
|
||||
if not self.uri.exists():
|
||||
if isinstance(i, int):
|
||||
return None, None
|
||||
elif isinstance(i, slice):
|
||||
return pd.Series()
|
||||
return pd.Series(dtype=np.float32)
|
||||
else:
|
||||
raise TypeError(f"type(i) = {type(i)}")
|
||||
|
||||
with open(self.uri, "rb") as fp:
|
||||
|
||||
storage_start_index = self.start_index
|
||||
storage_end_index = self.end_index
|
||||
with self.uri.open("rb") as fp:
|
||||
if isinstance(i, int):
|
||||
if self.start_index > i:
|
||||
raise IndexError(f"{i}: start index is {self.start_index}")
|
||||
fp.seek(4 * (i - self.start_index) + 4)
|
||||
|
||||
if storage_start_index > i:
|
||||
raise IndexError(f"{i}: start index is {storage_start_index}")
|
||||
fp.seek(4 * (i - storage_start_index) + 4)
|
||||
return i, struct.unpack("f", fp.read(4))[0]
|
||||
elif isinstance(i, slice):
|
||||
start_index = self.start_index if i.start is None else i.start
|
||||
end_index = self.end_index if i.stop is None else i.stop - 1
|
||||
si = max(self.start_index, start_index)
|
||||
start_index = storage_start_index if i.start is None else i.start
|
||||
end_index = storage_end_index if i.stop is None else i.stop - 1
|
||||
si = max(start_index, storage_start_index)
|
||||
if si > end_index:
|
||||
return pd.Series()
|
||||
fp.seek(4 * (si - self.start_index) + 4)
|
||||
return pd.Series(dtype=np.float32)
|
||||
fp.seek(4 * (si - storage_start_index) + 4)
|
||||
# read n bytes
|
||||
count = end_index - si + 1
|
||||
data = np.frombuffer(fp.read(4 * count), dtype="<f")
|
||||
@@ -251,4 +288,5 @@ class FileFeatureStorage(FileStorage, FeatureStorage):
|
||||
raise TypeError(f"type(i) = {type(i)}")
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.uri.stat().st_size // 4 - 1 if self.check_exists() else 0
|
||||
self.check()
|
||||
return self.uri.stat().st_size // 4 - 1
|
||||
|
||||
@@ -25,24 +25,28 @@ class UserCalendarStorage(CalendarStorage):
|
||||
|
||||
@property
|
||||
def data(self) -> Iterable[CalVT]:
|
||||
'''get all data'''
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
|
||||
'''get all data
|
||||
|
||||
def check_exists(self) -> bool:
|
||||
'''check if storage(uri) exists, if not exists: return False'''
|
||||
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
'''
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
|
||||
|
||||
|
||||
class UserInstrumentStorage(InstrumentStorage):
|
||||
|
||||
@property
|
||||
def data(self) -> Dict[InstKT, InstVT]:
|
||||
'''get all data'''
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
|
||||
'''get all data
|
||||
|
||||
def check_exists(self) -> bool:
|
||||
'''check if storage(uri) exists, if not exists: return False'''
|
||||
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
'''
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
|
||||
|
||||
|
||||
class UserFeatureStorage(FeatureStorage):
|
||||
@@ -53,103 +57,64 @@ class UserFeatureStorage(FeatureStorage):
|
||||
Returns
|
||||
-------
|
||||
pd.Series(values, index=pd.RangeIndex(start, len(values))
|
||||
|
||||
Notes
|
||||
-------
|
||||
if data(storage) does not exist:
|
||||
if isinstance(i, int):
|
||||
return (None, None)
|
||||
if isinstance(i, slice):
|
||||
# return empty pd.Series
|
||||
return pd.Series(dtype=np.float32)
|
||||
'''
|
||||
raise NotImplementedError(
|
||||
"Subclass of FeatureStorage must implement `__getitem__(s: slice)` method"
|
||||
)
|
||||
|
||||
def check_exists(self) -> bool:
|
||||
'''check if storage(uri) exists, if not exists: return False'''
|
||||
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class StorageMeta(type):
|
||||
"""unified management of raise when storage is not exists"""
|
||||
|
||||
def __new__(cls, name, bases, dict):
|
||||
class_obj = type.__new__(cls, name, bases, dict)
|
||||
|
||||
# The calls to __iter__ and __getitem__ do not pass through __getattribute__.
|
||||
# In order to throw an exception before calling __getitem__, use the metaclass
|
||||
_getitem_func = getattr(class_obj, "__getitem__")
|
||||
|
||||
def _getitem(obj, item):
|
||||
getattr(obj, "_check")()
|
||||
try:
|
||||
res = _getitem_func(obj, item)
|
||||
except Exception as e:
|
||||
raise ValueError(f"{obj.raise_info}\n\tStorage exception info: {str(e)}")
|
||||
return res
|
||||
|
||||
setattr(class_obj, "__getitem__", _getitem)
|
||||
return class_obj
|
||||
|
||||
|
||||
class BaseStorage(metaclass=StorageMeta):
|
||||
class BaseStorage:
|
||||
@property
|
||||
def storage_name(self) -> str:
|
||||
return re.findall("[A-Z][^A-Z]*", self.__class__.__name__)[-2].lower()
|
||||
|
||||
@property
|
||||
def raise_info(self):
|
||||
parameters_info = [
|
||||
f"{_k}={_v}"
|
||||
for _k, _v in self.__dict__.items()
|
||||
if not isinstance(_v, (dict,)) or (hasattr(_v, "__len__") and len(_v) < 3)
|
||||
]
|
||||
return f"{self.storage_name.lower()} not exists, storage parameters: {parameters_info}"
|
||||
|
||||
def check_exists(self) -> bool:
|
||||
"""check if storage(uri) exists, if not exists: return False"""
|
||||
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
|
||||
|
||||
def clear(self) -> None:
|
||||
"""clear storage"""
|
||||
raise NotImplementedError("Subclass of BaseStorage must implement `clear` method")
|
||||
|
||||
def __len__(self) -> 0:
|
||||
return len(self.data) if self.check_exists() else 0
|
||||
|
||||
def __getitem__(self, item: Union[slice, Union[int, InstKT]]):
|
||||
raise NotImplementedError(
|
||||
"Subclass of BaseStorage must implement `__getitem__(i: Union[int, InstKT])`/`__getitem__(s: slice)` method"
|
||||
)
|
||||
|
||||
def _check(self):
|
||||
if not self.check_exists():
|
||||
raise ValueError(self.raise_info)
|
||||
|
||||
def __getattribute__(self, item):
|
||||
if item == "data":
|
||||
self._check()
|
||||
try:
|
||||
res = super(BaseStorage, self).__getattribute__(item)
|
||||
except Exception as e:
|
||||
raise ValueError(f"{self.raise_info}\n\tStorage exception info: {str(e)}")
|
||||
return res
|
||||
|
||||
|
||||
class CalendarStorage(BaseStorage):
|
||||
"""
|
||||
The behavior of CalendarStorage's methods and List's methods of the same name remain consistent
|
||||
"""
|
||||
|
||||
def __init__(self, freq: str, future: bool, uri: str, **kwargs):
|
||||
def __init__(self, freq: str, future: bool, **kwargs):
|
||||
self.freq = freq
|
||||
self.future = future
|
||||
self.uri = uri
|
||||
self.kwargs = kwargs
|
||||
|
||||
@property
|
||||
def data(self) -> Iterable[CalVT]:
|
||||
"""get all data"""
|
||||
"""get all data
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
"""
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
|
||||
|
||||
def clear(self) -> None:
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `clear` method")
|
||||
|
||||
def extend(self, iterable: Iterable[CalVT]) -> None:
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `extend` method")
|
||||
|
||||
def index(self, value: CalVT) -> int:
|
||||
"""
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
"""
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `index` method")
|
||||
|
||||
def insert(self, index: int, value: CalVT) -> None:
|
||||
@@ -184,6 +149,12 @@ class CalendarStorage(BaseStorage):
|
||||
...
|
||||
|
||||
def __delitem__(self, i) -> None:
|
||||
"""
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Subclass of CalendarStorage must implement `__delitem__(i: int)`/`__delitem__(s: slice)` method"
|
||||
)
|
||||
@@ -199,26 +170,60 @@ class CalendarStorage(BaseStorage):
|
||||
...
|
||||
|
||||
def __getitem__(self, i) -> CalVT:
|
||||
"""
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Subclass of CalendarStorage must implement `__getitem__(i: int)`/`__getitem__(s: slice)` method"
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
|
||||
"""
|
||||
raise NotImplementedError("Subclass of CalendarStorage must implement `__len__` method")
|
||||
|
||||
|
||||
class InstrumentStorage(BaseStorage):
|
||||
def __init__(self, market: str, uri: str, **kwargs):
|
||||
def __init__(self, market: str, **kwargs):
|
||||
self.market = market
|
||||
self.uri = uri
|
||||
self.kwargs = kwargs
|
||||
|
||||
@property
|
||||
def data(self) -> Dict[InstKT, InstVT]:
|
||||
"""get all data"""
|
||||
"""get all data
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
"""
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
|
||||
|
||||
def clear(self) -> None:
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `clear` method")
|
||||
|
||||
def update(self, *args, **kwargs) -> None:
|
||||
"""D.update([E, ]**F) -> None. Update D from mapping/iterable E and F.
|
||||
If E present and has a .keys() method, does: for k in E: D[k] = E[k]
|
||||
If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
|
||||
In either case, this is followed by: for k, v in F.items(): D[k] = v
|
||||
|
||||
Notes
|
||||
------
|
||||
If E present and has a .keys() method, does: for k in E: D[k] = E[k]
|
||||
|
||||
If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
|
||||
|
||||
In either case, this is followed by: for k, v in F.items(): D[k] = v
|
||||
|
||||
"""
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `update` method")
|
||||
|
||||
@@ -227,53 +232,96 @@ class InstrumentStorage(BaseStorage):
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `__setitem__` method")
|
||||
|
||||
def __delitem__(self, k: InstKT) -> None:
|
||||
"""Delete self[key]."""
|
||||
"""Delete self[key].
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
"""
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `__delitem__` method")
|
||||
|
||||
def __getitem__(self, k: InstKT) -> InstVT:
|
||||
"""x.__getitem__(k) <==> x[k]"""
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `__getitem__` method")
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
|
||||
"""
|
||||
raise NotImplementedError("Subclass of InstrumentStorage must implement `__len__` method")
|
||||
|
||||
|
||||
class FeatureStorage(BaseStorage):
|
||||
def __init__(self, instrument: str, field: str, freq: str, uri: str, **kwargs):
|
||||
def __init__(self, instrument: str, field: str, freq: str, **kwargs):
|
||||
self.instrument = instrument
|
||||
self.field = field
|
||||
self.freq = freq
|
||||
self.uri = uri
|
||||
self.kwargs = kwargs
|
||||
|
||||
@property
|
||||
def data(self) -> pd.Series:
|
||||
"""get all data"""
|
||||
"""get all data
|
||||
|
||||
Notes
|
||||
------
|
||||
if data(storage) does not exist, return empty pd.Series: `return pd.Series(dtype=np.float32)`
|
||||
"""
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `data` method")
|
||||
|
||||
@property
|
||||
def start_index(self) -> Union[int, None]:
|
||||
"""get FeatureStorage start index
|
||||
If len(self) == 0; return None
|
||||
|
||||
Notes
|
||||
-----
|
||||
If the data(storage) does not exist, return None
|
||||
"""
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `data` method")
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `start_index` method")
|
||||
|
||||
@property
|
||||
def end_index(self) -> Union[int, None]:
|
||||
if len(self) == 0:
|
||||
return None
|
||||
return None if len(self) == 0 else self.start_index + len(self) - 1
|
||||
"""get FeatureStorage end index
|
||||
|
||||
Notes
|
||||
-----
|
||||
The right index of the data range (both sides are closed)
|
||||
|
||||
The next data appending point will be `end_index + 1`
|
||||
|
||||
If the data(storage) does not exist, return None
|
||||
"""
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `end_index` method")
|
||||
|
||||
def clear(self) -> None:
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `clear` method")
|
||||
|
||||
def write(self, data_array: Union[List, np.ndarray, Tuple], index: int = None):
|
||||
"""Write data_array to FeatureStorage starting from index.
|
||||
If index is None, append data_array to feature.
|
||||
If len(data_array) == 0; return
|
||||
If (index - self.end_index) >= 1, self[end_index+1: index] will be filled with np.nan
|
||||
|
||||
Notes
|
||||
------
|
||||
If index is None, append data_array to feature.
|
||||
|
||||
Examples:
|
||||
If len(data_array) == 0; return
|
||||
|
||||
If (index - self.end_index) >= 1, self[end_index+1: index] will be filled with np.nan
|
||||
|
||||
Examples
|
||||
---------
|
||||
.. code-block::
|
||||
|
||||
feature:
|
||||
3 4
|
||||
4 5
|
||||
5 6
|
||||
|
||||
|
||||
>>> self.write([6, 7], index=6)
|
||||
|
||||
feature:
|
||||
@@ -311,56 +359,70 @@ class FeatureStorage(BaseStorage):
|
||||
def rebase(self, start_index: int = None, end_index: int = None):
|
||||
"""Rebase the start_index and end_index of the FeatureStorage.
|
||||
|
||||
Examples:
|
||||
start_index and end_index are closed intervals: [start_index, end_index]
|
||||
|
||||
feature:
|
||||
3 4
|
||||
4 5
|
||||
5 6
|
||||
Examples
|
||||
---------
|
||||
|
||||
>>> self.rebase(start_index=4)
|
||||
.. code-block::
|
||||
|
||||
feature:
|
||||
4 5
|
||||
5 6
|
||||
feature:
|
||||
3 4
|
||||
4 5
|
||||
5 6
|
||||
|
||||
>>> self.rebase(start_index=3)
|
||||
|
||||
feature:
|
||||
3 np.nan
|
||||
4 5
|
||||
5 6
|
||||
>>> self.rebase(start_index=4)
|
||||
|
||||
>>> self.write([3], index=3)
|
||||
feature:
|
||||
4 5
|
||||
5 6
|
||||
|
||||
feature:
|
||||
3 3
|
||||
4 5
|
||||
5 6
|
||||
>>> self.rebase(start_index=3)
|
||||
|
||||
>>> self.rebase(end_index=4)
|
||||
feature:
|
||||
3 np.nan
|
||||
4 5
|
||||
5 6
|
||||
|
||||
feature:
|
||||
3 3
|
||||
4 5
|
||||
>>> self.write([3], index=3)
|
||||
|
||||
>>> self.write([6, 7, 8], index=4)
|
||||
feature:
|
||||
3 3
|
||||
4 5
|
||||
5 6
|
||||
|
||||
feature:
|
||||
3 3
|
||||
4 6
|
||||
5 7
|
||||
6 8
|
||||
>>> self.rebase(end_index=4)
|
||||
|
||||
>>> self.rebase(start_index=4, end_index=5)
|
||||
feature:
|
||||
3 3
|
||||
4 5
|
||||
|
||||
feature:
|
||||
4 6
|
||||
5 7
|
||||
>>> self.write([6, 7, 8], index=4)
|
||||
|
||||
feature:
|
||||
3 3
|
||||
4 6
|
||||
5 7
|
||||
6 8
|
||||
|
||||
>>> self.rebase(start_index=4, end_index=5)
|
||||
|
||||
feature:
|
||||
4 6
|
||||
5 7
|
||||
|
||||
"""
|
||||
if start_index is None and end_index is None:
|
||||
logger.warning("both start_index and end_index are None, rebase is ignored")
|
||||
storage_si = self.start_index
|
||||
storage_ei = self.end_index
|
||||
if storage_si is None or storage_ei is None:
|
||||
raise ValueError("storage.start_index or storage.end_index is None, storage may not exist")
|
||||
|
||||
start_index = storage_si if start_index is None else start_index
|
||||
end_index = storage_ei if end_index is None else end_index
|
||||
|
||||
if start_index is None or end_index is None:
|
||||
logger.warning("both start_index and end_index are None, or storage does not exist; rebase is ignored")
|
||||
return
|
||||
|
||||
if start_index < 0 or end_index < 0:
|
||||
@@ -373,17 +435,15 @@ class FeatureStorage(BaseStorage):
|
||||
)
|
||||
return
|
||||
|
||||
start_index = self.start_index if start_index is None else end_index
|
||||
end_index = self.end_index if end_index is None else end_index
|
||||
if start_index <= self.start_index:
|
||||
self.write([np.nan] * (self.start_index - start_index), start_index)
|
||||
if start_index <= storage_si:
|
||||
self.write([np.nan] * (storage_si - start_index), start_index)
|
||||
else:
|
||||
self.rewrite(self[start_index:].values, start_index)
|
||||
|
||||
if end_index >= self.end_index:
|
||||
self.write([np.nan] * (end_index - self.end_index))
|
||||
else:
|
||||
self.rewrite(self[: end_index + 1].values, self.start_index)
|
||||
self.rewrite(self[: end_index + 1].values, start_index)
|
||||
|
||||
def rewrite(self, data: Union[List, np.ndarray, Tuple], index: int):
|
||||
"""overwrite all data in FeatureStorage with data
|
||||
@@ -414,7 +474,28 @@ class FeatureStorage(BaseStorage):
|
||||
...
|
||||
|
||||
def __getitem__(self, i) -> Union[Tuple[int, float], pd.Series]:
|
||||
"""x.__getitem__(y) <==> x[y]"""
|
||||
"""x.__getitem__(y) <==> x[y]
|
||||
|
||||
Notes
|
||||
-------
|
||||
if data(storage) does not exist:
|
||||
if isinstance(i, int):
|
||||
return (None, None)
|
||||
if isinstance(i, slice):
|
||||
# return empty pd.Series
|
||||
return pd.Series(dtype=np.float32)
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Subclass of FeatureStorage must implement `__getitem__(i: int)`/`__getitem__(s: slice)` method"
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the data(storage) does not exist, raise ValueError
|
||||
|
||||
"""
|
||||
raise NotImplementedError("Subclass of FeatureStorage must implement `__len__` method")
|
||||
|
||||
@@ -665,7 +665,10 @@ def exists_qlib_data(qlib_dir):
|
||||
return False
|
||||
# check calendar bin
|
||||
for _calendar in calendars_dir.iterdir():
|
||||
if not list(features_dir.rglob(f"*.{_calendar.name.split('.')[0]}.bin")):
|
||||
|
||||
if ("_future" not in _calendar.name) and (
|
||||
not list(features_dir.rglob(f"*.{_calendar.name.split('.')[0]}.bin"))
|
||||
):
|
||||
return False
|
||||
|
||||
# check instruments
|
||||
|
||||
@@ -120,7 +120,7 @@ class DumpDataBase:
|
||||
else:
|
||||
df = file_or_df
|
||||
if df.empty or self.date_field_name not in df.columns.tolist():
|
||||
_calendars = pd.Series()
|
||||
_calendars = pd.Series(dtype=np.float32)
|
||||
else:
|
||||
_calendars = df[self.date_field_name]
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ QLIB_DIR.mkdir(exist_ok=True, parents=True)
|
||||
class TestStorage(TestAutoData):
|
||||
def test_calendar_storage(self):
|
||||
|
||||
calendar = CalendarStorage(freq="day", future=False, uri=self.provider_uri)
|
||||
calendar = CalendarStorage(freq="day", future=False, provider_uri=self.provider_uri)
|
||||
assert isinstance(calendar[:], Iterable), f"{calendar.__class__.__name__}.__getitem__(s: slice) is not Iterable"
|
||||
assert isinstance(calendar.data, Iterable), f"{calendar.__class__.__name__}.data is not Iterable"
|
||||
|
||||
@@ -32,6 +32,16 @@ class TestStorage(TestAutoData):
|
||||
print(f"calendar[0]: {calendar[0]}")
|
||||
print(f"calendar[-1]: {calendar[-1]}")
|
||||
|
||||
calendar = CalendarStorage(freq="1min", future=False, provider_uri="not_found")
|
||||
with pytest.raises(ValueError):
|
||||
print(calendar.data)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
print(calendar[:])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
print(calendar[0])
|
||||
|
||||
def test_instrument_storage(self):
|
||||
"""
|
||||
The meaning of instrument, such as CSI500:
|
||||
@@ -66,7 +76,7 @@ class TestStorage(TestAutoData):
|
||||
|
||||
"""
|
||||
|
||||
instrument = InstrumentStorage(market="csi300", uri=self.provider_uri)
|
||||
instrument = InstrumentStorage(market="csi300", provider_uri=self.provider_uri)
|
||||
|
||||
for inst, spans in instrument.data.items():
|
||||
assert isinstance(inst, str) and isinstance(
|
||||
@@ -79,6 +89,13 @@ class TestStorage(TestAutoData):
|
||||
|
||||
print(f"instrument['SH600000']: {instrument['SH600000']}")
|
||||
|
||||
instrument = InstrumentStorage(market="csi300", provider_uri="not_found")
|
||||
with pytest.raises(ValueError):
|
||||
print(instrument.data)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
print(instrument["sSH600000"])
|
||||
|
||||
def test_feature_storage(self):
|
||||
"""
|
||||
Calendar:
|
||||
@@ -133,9 +150,9 @@ class TestStorage(TestAutoData):
|
||||
|
||||
"""
|
||||
|
||||
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", uri=self.provider_uri)
|
||||
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri=self.provider_uri)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with pytest.raises(IndexError):
|
||||
print(feature[0])
|
||||
assert isinstance(
|
||||
feature[815][1], (float, np.float32)
|
||||
@@ -144,3 +161,11 @@ class TestStorage(TestAutoData):
|
||||
print(f"feature[815: 818]: \n{feature[815: 818]}")
|
||||
|
||||
print(f"feature[:].tail(): \n{feature[:].tail()}")
|
||||
|
||||
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri="not_fount")
|
||||
|
||||
assert feature[0] == (None, None), "FeatureStorage does not exist, feature[i] should return `(None, None)`"
|
||||
assert feature[:].empty, "FeatureStorage does not exist, feature[:] should return `pd.Series(dtype=np.float32)`"
|
||||
assert (
|
||||
feature.data.empty
|
||||
), "FeatureStorage does not exist, feature.data should return `pd.Series(dtype=np.float32)`"
|
||||
|
||||
Reference in New Issue
Block a user