diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index eab889e85..bb1da2754 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -257,7 +257,7 @@ class DataHandler(Serializable): selector=selector, level=level, col_set=col_set, fetch_orig=self.fetch_orig, proc_func=proc_func ) else: - raise TypeError(f"data_storage should be pd.DataFrame|HasingStockStorage, not {type(data_storage)}") + raise TypeError(f"data_storage should be pd.DataFrame|HashingStockStorage, not {type(data_storage)}") if squeeze: # squeeze columns diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py index ec3fa5506..6381937d3 100644 --- a/qlib/data/dataset/processor.py +++ b/qlib/data/dataset/processor.py @@ -349,6 +349,6 @@ class HashStockFormat(Processor): """Process the storage of from df into hasing stock format""" def __call__(self, df: pd.DataFrame): - from .storage import HasingStockStorage # pylint: disable=C0415 + from .storage import HashingStockStorage # pylint: disable=C0415 - return HasingStockStorage.from_df(df) + return HashingStockStorage.from_df(df) diff --git a/qlib/data/dataset/storage.py b/qlib/data/dataset/storage.py index 42f003269..a8ccdadaa 100644 --- a/qlib/data/dataset/storage.py +++ b/qlib/data/dataset/storage.py @@ -59,11 +59,11 @@ class BaseHandlerStorage: raise NotImplementedError("is_proc_func_supported method is not implemented!") -class HasingStockStorage(BaseHandlerStorage): - """Hasing data storage for datahanlder +class HashingStockStorage(BaseHandlerStorage): + """Hashing data storage for datahanlder - The default data storage pandas.DataFrame is too slow when randomly accessing one stock's data - - HasingStockStorage hashes the multiple stocks' data(pandas.DataFrame) by the key `stock_id`. - - HasingStockStorage hashes the pandas.DataFrame into a dict, whose key is the stock_id(str) and value this stock data(panda.DataFrame), it has the following format: + - HashingStockStorage hashes the multiple stocks' data(pandas.DataFrame) by the key `stock_id`. + - HashingStockStorage hashes the pandas.DataFrame into a dict, whose key is the stock_id(str) and value this stock data(panda.DataFrame), it has the following format: { stock1_id: stock1_data, stock2_id: stock2_data, @@ -82,7 +82,7 @@ class HasingStockStorage(BaseHandlerStorage): @staticmethod def from_df(df): - return HasingStockStorage(df) + return HashingStockStorage(df) def _fetch_hash_df_by_stock(self, selector, level): """fetch the data with stock selector @@ -153,5 +153,5 @@ class HasingStockStorage(BaseHandlerStorage): return pd.concat(fetch_stock_df_list, sort=False, copy=~fetch_orig) def is_proc_func_supported(self): - """the arg `proc_func` in `fetch` method is not supported in HasingStockStorage""" + """the arg `proc_func` in `fetch` method is not supported in HashingStockStorage""" return False diff --git a/tests/test_handler_storage.py b/tests/test_handler_storage.py index 056595063..0d8ad4d57 100644 --- a/tests/test_handler_storage.py +++ b/tests/test_handler_storage.py @@ -95,7 +95,7 @@ class TestHandlerStorage(TestAutoData): fetch_stocks = [instruments[_index] for _index in random_indexs] data_handler.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None) - with TimeInspector.logt("random fetch with HasingStock Storage"): + with TimeInspector.logt("random fetch with HashingStock Storage"): # single stock for i in range(100):