From 9b8acd9a82f451c3a5f3c145020e2e11f66ed05a Mon Sep 17 00:00:00 2001 From: zhupr Date: Sat, 27 Mar 2021 01:15:33 +0800 Subject: [PATCH] Fix FileStorage --- qlib/data/storage/__init__.py | 2 +- qlib/data/storage/file_storage.py | 20 +++-- qlib/data/storage/storage.py | 2 +- tests/storage_tests/test_storage.py | 120 +++++++++++++--------------- 4 files changed, 73 insertions(+), 71 deletions(-) diff --git a/qlib/data/storage/__init__.py b/qlib/data/storage/__init__.py index eb513714b..f42504791 100644 --- a/qlib/data/storage/__init__.py +++ b/qlib/data/storage/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from .storage import CalendarStorage, InstrumentStorage, FeatureStorage \ No newline at end of file +from .storage import CalendarStorage, InstrumentStorage, FeatureStorage diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py index 9d98545ce..aadc918c3 100644 --- a/qlib/data/storage/file_storage.py +++ b/qlib/data/storage/file_storage.py @@ -1,14 +1,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import struct from pathlib import Path from typing import Iterator, Iterable, Type, List, Tuple, Text, Union -from data.storage.storage import FeatureVT +from .storage import FeatureVT import numpy as np import pandas as pd -from qlib.data.storage import CalendarStorage, InstrumentStorage, FeatureStorage +from . import CalendarStorage, InstrumentStorage, FeatureStorage CalVT = Type[pd.Timestamp] @@ -70,9 +71,9 @@ class FileFeatureStorage(FeatureStorage): if isinstance(i, int): if ref_start_index > i: - raise IndexError(f"{i}") + raise IndexError(f"{i}: start index is {ref_start_index}") fp.seek(4 * (i - ref_start_index) + 4) - return i, float(fp.read(4)) + return i, struct.unpack("f", fp.read(4)) elif isinstance(i, slice): start_index = i.start end_index = i.stop - 1 @@ -83,9 +84,18 @@ class FileFeatureStorage(FeatureStorage): # read n bytes count = end_index - si + 1 data = np.frombuffer(fp.read(4 * count), dtype=" int: return Path(self._uri).stat().st_size // 4 - 1 + + def __iter__(self): + with open(self._uri, "rb") as fp: + ref_start_index = int(np.frombuffer(fp.read(4), dtype=")] - close_items = feature[31:34] + assert isinstance(feature, Iterable), f"{feature.__class__.__name__} is not Iterable" + with pytest.raises(IndexError): + print(feature[0]) + assert len(feature[815:818]) == 3, f"{feature.__class__.__name__}.__getitem__(s: slice) error" + print(f"feature[815: 818]: {feature[815: 818]}") - # 2005-02-01, ..., 2005-03-01 - # feature: [(31, 1), ..., (59, 4)] - print(feature) - - assert ( - len(feature) == len(feature[:]) == len(feature[31:60]) == 29 - ), f"{feature.__name__}.items/__getitem__(s: slice) error" + for _item in feature: + assert ( + isinstance(_item, tuple) and len(_item) == 2 + ), f"{feature.__class__.__name__}.__iter__ item type error" + assert isinstance(_item[0], int) and isinstance( + _item[1], (float, np.float, np.float32) + ), f"{feature.__class__.__name__}.__iter__ value type error"