1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 11:30:57 +08:00

better MemCacheUnit implement

This commit is contained in:
hadrianl
2020-12-21 18:17:20 +08:00
committed by you-n-g
parent 7e37fa710a
commit ae0e0eca3d

View File

@@ -13,6 +13,7 @@ import pickle
import traceback
import redis_lock
import contextlib
import abc
from pathlib import Path
import numpy as np
import pandas as pd
@@ -39,33 +40,127 @@ class QlibCacheException(RuntimeError):
pass
class MemCacheUnit(OrderedDict):
class MemCacheUnit(abc.ABC):
"""Memory Cache Unit."""
# TODO: use min_heap to replace ordereddict for better performance
def __init__(self, *args, **kwargs):
self.size_limit = kwargs.pop("size_limit", None)
# limit_type: check size_limit type, length(call fun: len) or size(call fun: sys.getsizeof)
self.size_limit = kwargs.pop("size_limit", 0)
self.limit_type = kwargs.pop("limit_type", "length")
super(MemCacheUnit, self).__init__(*args, **kwargs)
self._check_size_limit()
assert self.limit_type in ["length", "sizeof"], ValueError(
"limit_type shoule be one of ['length', 'sizeof']"
)
assert self.size_limit >= 0, ValueError(
"size_limit shoule not be negative.The default 0 means unlimited!"
)
# limit_flag: whether to popitem or not
self._limit_flag = 1 if self.size_limit > 0 else 0
self._size = 0
self.od = OrderedDict()
def __setitem__(self, key, value):
super(MemCacheUnit, self).__setitem__(key, value)
self._check_size_limit()
# TODO: thread safe?__setitem__ failure might cause inconsistent size?
# precalculate the size after od.__setitem__
self._adjust_size(key, value)
self.od.__setitem__(key, value)
# move the key to end,make it latest
self.od.move_to_end(key)
# pop the oldest items beyond size limit
while self._size * self._limit_flag > self.size_limit:
self.popitem(last=False)
def __getitem__(self, key):
value = super(MemCacheUnit, self).__getitem__(key)
super(MemCacheUnit, self).__delitem__(key)
super(MemCacheUnit, self).__setitem__(key, value)
return value
v = self.od.__getitem__(key)
self.od.move_to_end(key)
return v
def _check_size_limit(self):
if self.size_limit is not None:
get_cur_size = lambda x: len(x) if self.limit_type == "length" else sum(map(sys.getsizeof, x.values()))
while get_cur_size(self) > self.size_limit:
self.popitem(last=False)
def __contains__(self, key):
return key in self.od
def __len__(self):
return self.od.__len__()
def __repr__(self):
return f"{self.__class__.__name__}<limit_type:{self.limit_type} size_limit:{self.size_limit if self.is_limited else 'no limit'} total_size:{self._size}>\n{self.od.__repr__()}"
def set_limit_size(self, limit):
self.size_limit = limit
def set_limit(self, is_limited=True):
self._limit_flag = 1 if is_limited else 0
@property
def is_limited(self):
return bool(self._limit_flag)
@property
def total_size(self):
return self._size
def clear(self):
self._size = 0
self.od.clear()
@abc.abstractmethod
def _adjust_size(self, key, value):
raise NotImplementedError
@abc.abstractmethod
def popitem(self, last=True):
raise NotImplementedError
@abc.abstractmethod
def pop(self, key):
raise NotImplementedError
class MemCacheLengthUnit(MemCacheUnit):
def __init__(self, size_limit=0):
super().__init__(size_limit=size_limit, limit_type="length")
def _adjust_size(self, key, value):
if key not in self.od:
self._size += 1
def popitem(self, last=True):
k, v = self.od.popitem(last=last)
self._size -= 1
return k, v
def pop(self, key):
v = self.od.pop(key)
self._size -= 1
return v
class MemCacheSizeofUnit(MemCacheUnit):
def __init__(self, size_limit=0):
super().__init__(size_limit=size_limit, limit_type="sizeof")
def _adjust_size(self, key, value):
if key in self.od:
self._size = self._size - sys.getsizeof(self.od[key]) + sys.getsizeof(value)
else:
self._size += sys.getsizeof(value)
def popitem(self, last=True):
k, v = self.od.popitem(last=last)
self._size -= sys.getsizeof(v)
return k, v
def pop(self, key):
v = self.od.pop(key)
self._size -= sys.getsizeof(v)
return v
class MemCache:
@@ -79,22 +174,20 @@ class MemCache:
mem_cache_size_limit: cache max size.
limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof).
"""
if limit_type not in ["length", "sizeof"]:
size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit
if limit_type == "length":
self.__calendar_mem_cache = MemCacheLengthUnit(size_limit)
self.__instrument_mem_cache = MemCacheLengthUnit(size_limit)
self.__feature_mem_cache = MemCacheLengthUnit(size_limit)
elif limit_type == "sizeof":
self.__calendar_mem_cache = MemCacheSizeofUnit(size_limit)
self.__instrument_mem_cache = MemCacheSizeofUnit(size_limit)
self.__feature_mem_cache = MemCacheSizeofUnit(size_limit)
else:
raise ValueError(f"limit_type must be length or sizeof, your limit_type is {limit_type}")
self.__calendar_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
self.__instrument_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
self.__feature_mem_cache = MemCacheUnit(
size_limit=C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit,
limit_type=limit_type,
)
def __getitem__(self, key):
if key == "c":
return self.__calendar_mem_cache