1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

remove uri parameter from storage && modify file_storage

This commit is contained in:
zhupr
2021-05-26 01:01:36 +08:00
parent 602f78b568
commit 5da33562dd
7 changed files with 373 additions and 201 deletions

View File

@@ -53,6 +53,34 @@ Cache
.. autoclass:: qlib.data.cache.DiskDatasetCache
:members:
Storage
-------------
.. autoclass:: qlib.data.storage.storage.BaseStorage
:members:
.. autoclass:: qlib.data.storage.storage.CalendarStorage
:members:
.. autoclass:: qlib.data.storage.storage.InstrumentStorage
:members:
.. autoclass:: qlib.data.storage.storage.FeatureStorage
:members:
.. autoclass:: qlib.data.storage.file_storage.FileStorageMixin
:members:
.. autoclass:: qlib.data.storage.file_storage.FileCalendarStorage
:members:
.. autoclass:: qlib.data.storage.file_storage.FileInstrumentStorage
:members:
.. autoclass:: qlib.data.storage.file_storage.FileFeatureStorage
:members:
Dataset
---------------

View File

@@ -45,12 +45,12 @@ class ProviderBackendMixin:
# set default storage kwargs
backend_kwargs = backend.setdefault("kwargs", {})
# default uri map
if "uri" not in backend_kwargs:
# default provider_uri map
if "provider_uri" not in backend_kwargs:
# if the user has no uri configured, use: uri = uri_map[freq]
freq = kwargs.get("freq", "day")
uri_map = backend_kwargs.setdefault("uri_map", {freq: C.get_data_path()})
backend_kwargs["uri"] = uri_map[freq]
provider_uri_map = backend_kwargs.setdefault("provider_uri_map", {freq: C.get_data_path()})
backend_kwargs["provider_uri"] = provider_uri_map[freq]
backend.setdefault("kwargs", {}).update(**kwargs)
return init_instance_by_config(backend)
@@ -556,17 +556,21 @@ class LocalCalendarProvider(CalendarProvider):
list of timestamps
"""
backend_obj = self.backend_obj(freq=freq, future=future)
if future and not backend_obj.check_exists():
get_module_logger("data").warning(
f"load calendar error: freq={freq}, future={future}; return current calendar!"
)
get_module_logger("data").warning(
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
)
backend_obj = self.backend_obj(freq=freq, future=False)
try:
backend_obj = self.backend_obj(freq=freq, future=future).data
except ValueError:
if future:
get_module_logger("data").warning(
f"load calendar error: freq={freq}, future={future}; return current calendar!"
)
get_module_logger("data").warning(
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
)
backend_obj = self.backend_obj(freq=freq, future=False).data
else:
raise
return [pd.Timestamp(x) for x in backend_obj.data]
return [pd.Timestamp(x) for x in backend_obj]
def calendar(self, start_time=None, end_time=None, freq="day", future=False):
_calendar, _calendar_index = self._get_calendar(freq, future)
@@ -659,14 +663,7 @@ class LocalFeatureProvider(FeatureProvider):
# validate
field = str(field).lower()[1:]
instrument = code_to_fname(instrument)
try:
data = self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1]
except Exception as e:
get_module_logger("data").warning(
f"WARN: data not found for {instrument}.{field}\n\tFeature exception info: {str(e)}"
)
data = pd.Series(dtype=np.float32)
return data
return self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1]
class LocalExpressionProvider(ExpressionProvider):

View File

@@ -14,19 +14,35 @@ from qlib.data.storage import CalendarStorage, InstrumentStorage, FeatureStorage
logger = get_module_logger("file_storage")
class FileStorage:
def check_exists(self):
return self.uri.exists()
class FileStorageMixin:
@property
def uri(self) -> Path:
_provider_uri = self.kwargs.get("provider_uri", None)
if _provider_uri is None:
raise ValueError(
f"The `provider_uri` parameter is not found in {self.__class__.__name__}, "
f'please specify `provider_uri` in the "provider\'s backend"'
)
return Path(_provider_uri).expanduser().joinpath(f"{self.storage_name}s", self.file_name)
def check(self):
"""check self.uri
Raises
-------
ValueError
"""
if not self.uri.exists():
raise ValueError(f"{self.storage_name} not exists: {self.uri}")
class FileCalendarStorage(FileStorage, CalendarStorage):
def __init__(self, freq: str, future: bool, uri: str, **kwargs):
super(FileCalendarStorage, self).__init__(freq, future, uri, **kwargs)
_file_name = f"{freq}_future.txt" if future else f"{freq}.txt"
self.uri = Path(self.uri).expanduser().joinpath("calendars", _file_name.lower())
class FileCalendarStorage(FileStorageMixin, CalendarStorage):
def __init__(self, freq: str, future: bool, **kwargs):
super(FileCalendarStorage, self).__init__(freq, future, **kwargs)
self.file_name = f"{freq}_future.txt" if future else f"{freq}.txt".lower()
def _read_calendar(self, skip_rows: int = 0, n_rows: int = None) -> Iterable[CalVT]:
if not self.check_exists():
def _read_calendar(self, skip_rows: int = 0, n_rows: int = None) -> List[CalVT]:
if not self.uri.exists():
self._write_calendar(values=[])
with self.uri.open("rb") as fp:
return [
@@ -39,7 +55,8 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
np.savetxt(fp, values, fmt="%s", encoding="utf-8")
@property
def data(self) -> Iterable[CalVT]:
def data(self) -> List[CalVT]:
self.check()
return self._read_calendar()
def extend(self, values: Iterable[CalVT]) -> None:
@@ -49,6 +66,7 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
self._write_calendar(values=[])
def index(self, value: CalVT) -> int:
self.check()
calendar = self._read_calendar()
return int(np.argwhere(calendar == value)[0])
@@ -58,6 +76,7 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
self._write_calendar(values=calendar)
def remove(self, value: CalVT) -> None:
self.check()
index = self.index(value)
calendar = self._read_calendar()
calendar = np.delete(calendar, index)
@@ -69,24 +88,29 @@ class FileCalendarStorage(FileStorage, CalendarStorage):
self._write_calendar(values=calendar)
def __delitem__(self, i: Union[int, slice]) -> None:
self.check()
calendar = self._read_calendar()
calendar = np.delete(calendar, i)
self._write_calendar(values=calendar)
def __getitem__(self, i: Union[int, slice]) -> Union[CalVT, Iterable[CalVT]]:
def __getitem__(self, i: Union[int, slice]) -> Union[CalVT, List[CalVT]]:
self.check()
return self._read_calendar()[i]
def __len__(self) -> int:
return len(self.data)
class FileInstrumentStorage(FileStorage, InstrumentStorage):
class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
INSTRUMENT_SEP = "\t"
INSTRUMENT_START_FIELD = "start_datetime"
INSTRUMENT_END_FIELD = "end_datetime"
SYMBOL_FIELD_NAME = "instrument"
def __init__(self, market: str, uri: str, **kwargs):
super(FileInstrumentStorage, self).__init__(market, uri, **kwargs)
self.uri = Path(self.uri).expanduser().joinpath("instruments", f"{market.lower()}.txt")
def __init__(self, market: str, **kwargs):
super(FileInstrumentStorage, self).__init__(market, **kwargs)
self.file_name = f"{market.lower()}.txt"
def _read_instrument(self) -> Dict[InstKT, InstVT]:
if not self.uri.exists():
@@ -128,6 +152,7 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
@property
def data(self) -> Dict[InstKT, InstVT]:
self.check()
return self._read_instrument()
def __setitem__(self, k: InstKT, v: InstVT) -> None:
@@ -136,11 +161,13 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
self._write_instrument(inst)
def __delitem__(self, k: InstKT) -> None:
self.check()
inst = self._read_instrument()
del inst[k]
self._write_instrument(inst)
def __getitem__(self, k: InstKT) -> InstVT:
self.check()
return self._read_instrument()[k]
def update(self, *args, **kwargs) -> None:
@@ -164,13 +191,14 @@ class FileInstrumentStorage(FileStorage, InstrumentStorage):
self._write_instrument(inst)
def __len__(self) -> int:
return len(self.data)
class FileFeatureStorage(FileStorage, FeatureStorage):
def __init__(self, instrument: str, field: str, freq: str, uri: str, **kwargs):
super(FileFeatureStorage, self).__init__(instrument, field, freq, uri, **kwargs)
self.uri = (
Path(self.uri).expanduser().joinpath("features", instrument.lower(), f"{field.lower()}.{freq.lower()}.bin")
)
class FileFeatureStorage(FileStorageMixin, FeatureStorage):
def __init__(self, instrument: str, field: str, freq: str, **kwargs):
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
def clear(self):
with self.uri.open("wb") as _:
@@ -214,35 +242,44 @@ class FileFeatureStorage(FileStorage, FeatureStorage):
@property
def start_index(self) -> Union[int, None]:
if len(self) == 0:
if not self.uri.exists():
return None
with open(self.uri, "rb") as fp:
with self.uri.open("rb") as fp:
index = int(np.frombuffer(fp.read(4), dtype="<f")[0])
return index
@property
def end_index(self) -> Union[int, None]:
if not self.uri.exists():
return None
# The next data appending index point will be `end_index + 1`
return self.start_index + len(self) - 1
def __getitem__(self, i: Union[int, slice]) -> Union[Tuple[int, float], pd.Series]:
if not self.uri.exists():
if isinstance(i, int):
return None, None
elif isinstance(i, slice):
return pd.Series()
return pd.Series(dtype=np.float32)
else:
raise TypeError(f"type(i) = {type(i)}")
with open(self.uri, "rb") as fp:
storage_start_index = self.start_index
storage_end_index = self.end_index
with self.uri.open("rb") as fp:
if isinstance(i, int):
if self.start_index > i:
raise IndexError(f"{i}: start index is {self.start_index}")
fp.seek(4 * (i - self.start_index) + 4)
if storage_start_index > i:
raise IndexError(f"{i}: start index is {storage_start_index}")
fp.seek(4 * (i - storage_start_index) + 4)
return i, struct.unpack("f", fp.read(4))[0]
elif isinstance(i, slice):
start_index = self.start_index if i.start is None else i.start
end_index = self.end_index if i.stop is None else i.stop - 1
si = max(self.start_index, start_index)
start_index = storage_start_index if i.start is None else i.start
end_index = storage_end_index if i.stop is None else i.stop - 1
si = max(start_index, storage_start_index)
if si > end_index:
return pd.Series()
fp.seek(4 * (si - self.start_index) + 4)
return pd.Series(dtype=np.float32)
fp.seek(4 * (si - storage_start_index) + 4)
# read n bytes
count = end_index - si + 1
data = np.frombuffer(fp.read(4 * count), dtype="<f")
@@ -251,4 +288,5 @@ class FileFeatureStorage(FileStorage, FeatureStorage):
raise TypeError(f"type(i) = {type(i)}")
def __len__(self) -> int:
return self.uri.stat().st_size // 4 - 1 if self.check_exists() else 0
self.check()
return self.uri.stat().st_size // 4 - 1

View File

@@ -25,24 +25,28 @@ class UserCalendarStorage(CalendarStorage):
@property
def data(self) -> Iterable[CalVT]:
'''get all data'''
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
'''get all data
def check_exists(self) -> bool:
'''check if storage(uri) exists, if not exists: return False'''
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
'''
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
class UserInstrumentStorage(InstrumentStorage):
@property
def data(self) -> Dict[InstKT, InstVT]:
'''get all data'''
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
'''get all data
def check_exists(self) -> bool:
'''check if storage(uri) exists, if not exists: return False'''
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
'''
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
class UserFeatureStorage(FeatureStorage):
@@ -53,103 +57,64 @@ class UserFeatureStorage(FeatureStorage):
Returns
-------
pd.Series(values, index=pd.RangeIndex(start, len(values))
Notes
-------
if data(storage) does not exist:
if isinstance(i, int):
return (None, None)
if isinstance(i, slice):
# return empty pd.Series
return pd.Series(dtype=np.float32)
'''
raise NotImplementedError(
"Subclass of FeatureStorage must implement `__getitem__(s: slice)` method"
)
def check_exists(self) -> bool:
'''check if storage(uri) exists, if not exists: return False'''
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
"""
class StorageMeta(type):
"""unified management of raise when storage is not exists"""
def __new__(cls, name, bases, dict):
class_obj = type.__new__(cls, name, bases, dict)
# The calls to __iter__ and __getitem__ do not pass through __getattribute__.
# In order to throw an exception before calling __getitem__, use the metaclass
_getitem_func = getattr(class_obj, "__getitem__")
def _getitem(obj, item):
getattr(obj, "_check")()
try:
res = _getitem_func(obj, item)
except Exception as e:
raise ValueError(f"{obj.raise_info}\n\tStorage exception info: {str(e)}")
return res
setattr(class_obj, "__getitem__", _getitem)
return class_obj
class BaseStorage(metaclass=StorageMeta):
class BaseStorage:
@property
def storage_name(self) -> str:
return re.findall("[A-Z][^A-Z]*", self.__class__.__name__)[-2].lower()
@property
def raise_info(self):
parameters_info = [
f"{_k}={_v}"
for _k, _v in self.__dict__.items()
if not isinstance(_v, (dict,)) or (hasattr(_v, "__len__") and len(_v) < 3)
]
return f"{self.storage_name.lower()} not exists, storage parameters: {parameters_info}"
def check_exists(self) -> bool:
"""check if storage(uri) exists, if not exists: return False"""
raise NotImplementedError("Subclass of BaseStorage must implement `check_exists` method")
def clear(self) -> None:
"""clear storage"""
raise NotImplementedError("Subclass of BaseStorage must implement `clear` method")
def __len__(self) -> 0:
return len(self.data) if self.check_exists() else 0
def __getitem__(self, item: Union[slice, Union[int, InstKT]]):
raise NotImplementedError(
"Subclass of BaseStorage must implement `__getitem__(i: Union[int, InstKT])`/`__getitem__(s: slice)` method"
)
def _check(self):
if not self.check_exists():
raise ValueError(self.raise_info)
def __getattribute__(self, item):
if item == "data":
self._check()
try:
res = super(BaseStorage, self).__getattribute__(item)
except Exception as e:
raise ValueError(f"{self.raise_info}\n\tStorage exception info: {str(e)}")
return res
class CalendarStorage(BaseStorage):
"""
The behavior of CalendarStorage's methods and List's methods of the same name remain consistent
"""
def __init__(self, freq: str, future: bool, uri: str, **kwargs):
def __init__(self, freq: str, future: bool, **kwargs):
self.freq = freq
self.future = future
self.uri = uri
self.kwargs = kwargs
@property
def data(self) -> Iterable[CalVT]:
"""get all data"""
"""get all data
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of CalendarStorage must implement `data` method")
def clear(self) -> None:
raise NotImplementedError("Subclass of CalendarStorage must implement `clear` method")
def extend(self, iterable: Iterable[CalVT]) -> None:
raise NotImplementedError("Subclass of CalendarStorage must implement `extend` method")
def index(self, value: CalVT) -> int:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of CalendarStorage must implement `index` method")
def insert(self, index: int, value: CalVT) -> None:
@@ -184,6 +149,12 @@ class CalendarStorage(BaseStorage):
...
def __delitem__(self, i) -> None:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError(
"Subclass of CalendarStorage must implement `__delitem__(i: int)`/`__delitem__(s: slice)` method"
)
@@ -199,26 +170,60 @@ class CalendarStorage(BaseStorage):
...
def __getitem__(self, i) -> CalVT:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError(
"Subclass of CalendarStorage must implement `__getitem__(i: int)`/`__getitem__(s: slice)` method"
)
def __len__(self) -> int:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of CalendarStorage must implement `__len__` method")
class InstrumentStorage(BaseStorage):
def __init__(self, market: str, uri: str, **kwargs):
def __init__(self, market: str, **kwargs):
self.market = market
self.uri = uri
self.kwargs = kwargs
@property
def data(self) -> Dict[InstKT, InstVT]:
"""get all data"""
"""get all data
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of InstrumentStorage must implement `data` method")
def clear(self) -> None:
raise NotImplementedError("Subclass of InstrumentStorage must implement `clear` method")
def update(self, *args, **kwargs) -> None:
"""D.update([E, ]**F) -> None. Update D from mapping/iterable E and F.
If E present and has a .keys() method, does: for k in E: D[k] = E[k]
If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
In either case, this is followed by: for k, v in F.items(): D[k] = v
Notes
------
If E present and has a .keys() method, does: for k in E: D[k] = E[k]
If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
In either case, this is followed by: for k, v in F.items(): D[k] = v
"""
raise NotImplementedError("Subclass of InstrumentStorage must implement `update` method")
@@ -227,53 +232,96 @@ class InstrumentStorage(BaseStorage):
raise NotImplementedError("Subclass of InstrumentStorage must implement `__setitem__` method")
def __delitem__(self, k: InstKT) -> None:
"""Delete self[key]."""
"""Delete self[key].
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of InstrumentStorage must implement `__delitem__` method")
def __getitem__(self, k: InstKT) -> InstVT:
"""x.__getitem__(k) <==> x[k]"""
raise NotImplementedError("Subclass of InstrumentStorage must implement `__getitem__` method")
def __len__(self) -> int:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of InstrumentStorage must implement `__len__` method")
class FeatureStorage(BaseStorage):
def __init__(self, instrument: str, field: str, freq: str, uri: str, **kwargs):
def __init__(self, instrument: str, field: str, freq: str, **kwargs):
self.instrument = instrument
self.field = field
self.freq = freq
self.uri = uri
self.kwargs = kwargs
@property
def data(self) -> pd.Series:
"""get all data"""
"""get all data
Notes
------
if data(storage) does not exist, return empty pd.Series: `return pd.Series(dtype=np.float32)`
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `data` method")
@property
def start_index(self) -> Union[int, None]:
"""get FeatureStorage start index
If len(self) == 0; return None
Notes
-----
If the data(storage) does not exist, return None
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `data` method")
raise NotImplementedError("Subclass of FeatureStorage must implement `start_index` method")
@property
def end_index(self) -> Union[int, None]:
if len(self) == 0:
return None
return None if len(self) == 0 else self.start_index + len(self) - 1
"""get FeatureStorage end index
Notes
-----
The right index of the data range (both sides are closed)
The next data appending point will be `end_index + 1`
If the data(storage) does not exist, return None
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `end_index` method")
def clear(self) -> None:
raise NotImplementedError("Subclass of FeatureStorage must implement `clear` method")
def write(self, data_array: Union[List, np.ndarray, Tuple], index: int = None):
"""Write data_array to FeatureStorage starting from index.
If index is None, append data_array to feature.
If len(data_array) == 0; return
If (index - self.end_index) >= 1, self[end_index+1: index] will be filled with np.nan
Notes
------
If index is None, append data_array to feature.
Examples:
If len(data_array) == 0; return
If (index - self.end_index) >= 1, self[end_index+1: index] will be filled with np.nan
Examples
---------
.. code-block::
feature:
3 4
4 5
5 6
>>> self.write([6, 7], index=6)
feature:
@@ -311,56 +359,70 @@ class FeatureStorage(BaseStorage):
def rebase(self, start_index: int = None, end_index: int = None):
"""Rebase the start_index and end_index of the FeatureStorage.
Examples:
start_index and end_index are closed intervals: [start_index, end_index]
feature:
3 4
4 5
5 6
Examples
---------
>>> self.rebase(start_index=4)
.. code-block::
feature:
4 5
5 6
feature:
3 4
4 5
5 6
>>> self.rebase(start_index=3)
feature:
3 np.nan
4 5
5 6
>>> self.rebase(start_index=4)
>>> self.write([3], index=3)
feature:
4 5
5 6
feature:
3 3
4 5
5 6
>>> self.rebase(start_index=3)
>>> self.rebase(end_index=4)
feature:
3 np.nan
4 5
5 6
feature:
3 3
4 5
>>> self.write([3], index=3)
>>> self.write([6, 7, 8], index=4)
feature:
3 3
4 5
5 6
feature:
3 3
4 6
5 7
6 8
>>> self.rebase(end_index=4)
>>> self.rebase(start_index=4, end_index=5)
feature:
3 3
4 5
feature:
4 6
5 7
>>> self.write([6, 7, 8], index=4)
feature:
3 3
4 6
5 7
6 8
>>> self.rebase(start_index=4, end_index=5)
feature:
4 6
5 7
"""
if start_index is None and end_index is None:
logger.warning("both start_index and end_index are None, rebase is ignored")
storage_si = self.start_index
storage_ei = self.end_index
if storage_si is None or storage_ei is None:
raise ValueError("storage.start_index or storage.end_index is None, storage may not exist")
start_index = storage_si if start_index is None else start_index
end_index = storage_ei if end_index is None else end_index
if start_index is None or end_index is None:
logger.warning("both start_index and end_index are None, or storage does not exist; rebase is ignored")
return
if start_index < 0 or end_index < 0:
@@ -373,17 +435,15 @@ class FeatureStorage(BaseStorage):
)
return
start_index = self.start_index if start_index is None else end_index
end_index = self.end_index if end_index is None else end_index
if start_index <= self.start_index:
self.write([np.nan] * (self.start_index - start_index), start_index)
if start_index <= storage_si:
self.write([np.nan] * (storage_si - start_index), start_index)
else:
self.rewrite(self[start_index:].values, start_index)
if end_index >= self.end_index:
self.write([np.nan] * (end_index - self.end_index))
else:
self.rewrite(self[: end_index + 1].values, self.start_index)
self.rewrite(self[: end_index + 1].values, start_index)
def rewrite(self, data: Union[List, np.ndarray, Tuple], index: int):
"""overwrite all data in FeatureStorage with data
@@ -414,7 +474,28 @@ class FeatureStorage(BaseStorage):
...
def __getitem__(self, i) -> Union[Tuple[int, float], pd.Series]:
"""x.__getitem__(y) <==> x[y]"""
"""x.__getitem__(y) <==> x[y]
Notes
-------
if data(storage) does not exist:
if isinstance(i, int):
return (None, None)
if isinstance(i, slice):
# return empty pd.Series
return pd.Series(dtype=np.float32)
"""
raise NotImplementedError(
"Subclass of FeatureStorage must implement `__getitem__(i: int)`/`__getitem__(s: slice)` method"
)
def __len__(self) -> int:
"""
Raises
------
ValueError
If the data(storage) does not exist, raise ValueError
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `__len__` method")

View File

@@ -665,7 +665,10 @@ def exists_qlib_data(qlib_dir):
return False
# check calendar bin
for _calendar in calendars_dir.iterdir():
if not list(features_dir.rglob(f"*.{_calendar.name.split('.')[0]}.bin")):
if ("_future" not in _calendar.name) and (
not list(features_dir.rglob(f"*.{_calendar.name.split('.')[0]}.bin"))
):
return False
# check instruments

View File

@@ -120,7 +120,7 @@ class DumpDataBase:
else:
df = file_or_df
if df.empty or self.date_field_name not in df.columns.tolist():
_calendars = pd.Series()
_calendars = pd.Series(dtype=np.float32)
else:
_calendars = df[self.date_field_name]

View File

@@ -24,7 +24,7 @@ QLIB_DIR.mkdir(exist_ok=True, parents=True)
class TestStorage(TestAutoData):
def test_calendar_storage(self):
calendar = CalendarStorage(freq="day", future=False, uri=self.provider_uri)
calendar = CalendarStorage(freq="day", future=False, provider_uri=self.provider_uri)
assert isinstance(calendar[:], Iterable), f"{calendar.__class__.__name__}.__getitem__(s: slice) is not Iterable"
assert isinstance(calendar.data, Iterable), f"{calendar.__class__.__name__}.data is not Iterable"
@@ -32,6 +32,16 @@ class TestStorage(TestAutoData):
print(f"calendar[0]: {calendar[0]}")
print(f"calendar[-1]: {calendar[-1]}")
calendar = CalendarStorage(freq="1min", future=False, provider_uri="not_found")
with pytest.raises(ValueError):
print(calendar.data)
with pytest.raises(ValueError):
print(calendar[:])
with pytest.raises(ValueError):
print(calendar[0])
def test_instrument_storage(self):
"""
The meaning of instrument, such as CSI500:
@@ -66,7 +76,7 @@ class TestStorage(TestAutoData):
"""
instrument = InstrumentStorage(market="csi300", uri=self.provider_uri)
instrument = InstrumentStorage(market="csi300", provider_uri=self.provider_uri)
for inst, spans in instrument.data.items():
assert isinstance(inst, str) and isinstance(
@@ -79,6 +89,13 @@ class TestStorage(TestAutoData):
print(f"instrument['SH600000']: {instrument['SH600000']}")
instrument = InstrumentStorage(market="csi300", provider_uri="not_found")
with pytest.raises(ValueError):
print(instrument.data)
with pytest.raises(ValueError):
print(instrument["sSH600000"])
def test_feature_storage(self):
"""
Calendar:
@@ -133,9 +150,9 @@ class TestStorage(TestAutoData):
"""
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", uri=self.provider_uri)
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri=self.provider_uri)
with pytest.raises(ValueError):
with pytest.raises(IndexError):
print(feature[0])
assert isinstance(
feature[815][1], (float, np.float32)
@@ -144,3 +161,11 @@ class TestStorage(TestAutoData):
print(f"feature[815: 818]: \n{feature[815: 818]}")
print(f"feature[:].tail(): \n{feature[:].tail()}")
feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri="not_fount")
assert feature[0] == (None, None), "FeatureStorage does not exist, feature[i] should return `(None, None)`"
assert feature[:].empty, "FeatureStorage does not exist, feature[:] should return `pd.Series(dtype=np.float32)`"
assert (
feature.data.empty
), "FeatureStorage does not exist, feature.data should return `pd.Series(dtype=np.float32)`"