mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 02:50:58 +08:00
Update docs and fix tabnet
This commit is contained in:
@@ -129,13 +129,13 @@ class Expression(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instrument : str
|
||||
instrument code
|
||||
instrument code.
|
||||
start_index : str
|
||||
feature start index [in calendar]
|
||||
feature start index [in calendar].
|
||||
end_index : str
|
||||
feature end index [in calendar]
|
||||
feature end index [in calendar].
|
||||
freq : str
|
||||
feature frequency
|
||||
feature frequency.
|
||||
|
||||
Returns
|
||||
----------
|
||||
|
||||
@@ -76,8 +76,8 @@ class MemCache(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mem_cache_size_limit: cache max size
|
||||
limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof)
|
||||
mem_cache_size_limit: cache max size.
|
||||
limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof).
|
||||
"""
|
||||
if limit_type not in ["length", "sizeof"]:
|
||||
raise ValueError(f"limit_type must be length or sizeof, your limit_type is {limit_type}")
|
||||
@@ -118,9 +118,9 @@ class MemCacheExpire:
|
||||
def set_cache(mem_cache, key, value):
|
||||
"""set cache
|
||||
|
||||
:param mem_cache: MemCache attribute('c'/'i'/'f')
|
||||
:param key: cache key
|
||||
:param value: cache value
|
||||
:param mem_cache: MemCache attribute('c'/'i'/'f').
|
||||
:param key: cache key.
|
||||
:param value: cache value.
|
||||
"""
|
||||
mem_cache[key] = value, time.time()
|
||||
|
||||
@@ -128,9 +128,9 @@ class MemCacheExpire:
|
||||
def get_cache(mem_cache, key):
|
||||
"""get mem cache
|
||||
|
||||
:param mem_cache: MemCache attribute('c'/'i'/'f')
|
||||
:param key: cache key
|
||||
:return: cache value; if cache not exist, return None
|
||||
:param mem_cache: MemCache attribute('c'/'i'/'f').
|
||||
:param key: cache key.
|
||||
:return: cache value; if cache not exist, return None.
|
||||
"""
|
||||
value = None
|
||||
expire = False
|
||||
@@ -275,12 +275,12 @@ class ExpressionCache(BaseProviderCache):
|
||||
Parameters
|
||||
----------
|
||||
cache_uri : str
|
||||
the complete uri of expression cache file (include dir path)
|
||||
the complete uri of expression cache file (include dir path).
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
0(successful update)/ 1(no need to update)/ 2(update failure)
|
||||
0(successful update)/ 1(no need to update)/ 2(update failure).
|
||||
"""
|
||||
raise NotImplementedError("Implement this method if you want to make expression cache up to date")
|
||||
|
||||
@@ -348,7 +348,7 @@ class DatasetCache(BaseProviderCache):
|
||||
Parameters
|
||||
----------
|
||||
cache_uri : str
|
||||
the complete uri of dataset cache file (include dir path)
|
||||
the complete uri of dataset cache file (include dir path).
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -361,9 +361,9 @@ class DatasetCache(BaseProviderCache):
|
||||
def cache_to_origin_data(data, fields):
|
||||
"""cache data to origin data
|
||||
|
||||
:param data: pd.DataFrame, cache data
|
||||
:param fields: feature fields
|
||||
:return: pd.DataFrame
|
||||
:param data: pd.DataFrame, cache data.
|
||||
:param fields: feature fields.
|
||||
:return: pd.DataFrame.
|
||||
"""
|
||||
not_space_fields = remove_fields_space(fields)
|
||||
data = data.loc[:, not_space_fields]
|
||||
@@ -583,7 +583,7 @@ class DiskDatasetCache(DatasetCache):
|
||||
:param cache_path:
|
||||
:param start_time:
|
||||
:param end_time:
|
||||
:param fields: The fields order of the dataset cache is sorted. So rearrange the columns to make it consistent
|
||||
:param fields: The fields order of the dataset cache is sorted. So rearrange the columns to make it consistent.
|
||||
:return:
|
||||
"""
|
||||
|
||||
@@ -771,12 +771,12 @@ class DiskDatasetCache(DatasetCache):
|
||||
|
||||
- This is a hdf file sorted by datetime
|
||||
|
||||
:param cache_path: The path to store the cache
|
||||
:param instruments: The instruments to store the cache
|
||||
:param fields: The fields to store the cache
|
||||
:param freq: The freq to store the cache
|
||||
:param cache_path: The path to store the cache.
|
||||
:param instruments: The instruments to store the cache.
|
||||
:param fields: The fields to store the cache.
|
||||
:param freq: The freq to store the cache.
|
||||
|
||||
:return type pd.DataFrame; The fields of the returned DataFrame are consistent with the parameters of the function
|
||||
:return type pd.DataFrame; The fields of the returned DataFrame are consistent with the parameters of the function.
|
||||
"""
|
||||
# get calendar
|
||||
from .data import Cal
|
||||
|
||||
@@ -51,13 +51,13 @@ class Client(object):
|
||||
Parameters
|
||||
----------
|
||||
request_type : str
|
||||
type of proposed request, 'calendar'/'instrument'/'feature'
|
||||
type of proposed request, 'calendar'/'instrument'/'feature'.
|
||||
request_content : dict
|
||||
records the information of the request
|
||||
records the information of the request.
|
||||
msg_proc_func : func
|
||||
the function to process the message when receiving response, should have arg `*args`
|
||||
the function to process the message when receiving response, should have arg `*args`.
|
||||
msg_queue: Queue
|
||||
The queue to pass the messsage after callback
|
||||
The queue to pass the messsage after callback.
|
||||
"""
|
||||
head_info = {"version": qlib.__version__}
|
||||
|
||||
|
||||
@@ -41,13 +41,13 @@ class CalendarProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency, available: year/quarter/month/week/day
|
||||
time frequency, available: year/quarter/month/week/day.
|
||||
future : bool
|
||||
whether including future trading day
|
||||
whether including future trading day.
|
||||
|
||||
Returns
|
||||
----------
|
||||
@@ -62,24 +62,24 @@ class CalendarProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency, available: year/quarter/month/week/day
|
||||
time frequency, available: year/quarter/month/week/day.
|
||||
future : bool
|
||||
whether including future trading day
|
||||
whether including future trading day.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.Timestamp
|
||||
the real start time
|
||||
the real start time.
|
||||
pd.Timestamp
|
||||
the real end time
|
||||
the real end time.
|
||||
int
|
||||
the index of start time
|
||||
the index of start time.
|
||||
int
|
||||
the index of end time
|
||||
the index of end time.
|
||||
"""
|
||||
start_time = pd.Timestamp(start_time)
|
||||
end_time = pd.Timestamp(end_time)
|
||||
@@ -103,16 +103,16 @@ class CalendarProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
freq : str
|
||||
frequency of read calendar file
|
||||
frequency of read calendar file.
|
||||
future : bool
|
||||
whether including future trading day
|
||||
whether including future trading day.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list
|
||||
list of timestamps
|
||||
list of timestamps.
|
||||
dict
|
||||
dict composed by timestamp as key and index as value for fast search
|
||||
dict composed by timestamp as key and index as value for fast search.
|
||||
"""
|
||||
flag = f"{freq}_future_{future}"
|
||||
if flag in H["c"]:
|
||||
@@ -141,14 +141,14 @@ class InstrumentProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
market : str
|
||||
market/industry/index shortname, e.g. all/sse/szse/sse50/csi300/csi500
|
||||
market/industry/index shortname, e.g. all/sse/szse/sse50/csi300/csi500.
|
||||
filter_pipe : list
|
||||
the list of dynamic filters
|
||||
the list of dynamic filters.
|
||||
|
||||
Returns
|
||||
----------
|
||||
dict
|
||||
dict of stockpool config
|
||||
dict of stockpool config.
|
||||
{`market`=>base market name, `filter_pipe`=>list of filters}
|
||||
|
||||
example :
|
||||
@@ -182,13 +182,13 @@ class InstrumentProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instruments : dict
|
||||
stockpool config
|
||||
stockpool config.
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
as_list : bool
|
||||
return instruments as list or dict
|
||||
return instruments as list or dict.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -243,15 +243,15 @@ class FeatureProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instrument : str
|
||||
a certain instrument
|
||||
a certain instrument.
|
||||
field : str
|
||||
a certain field of feature
|
||||
a certain field of feature.
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency, available: year/quarter/month/week/day
|
||||
time frequency, available: year/quarter/month/week/day.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -294,15 +294,15 @@ class ExpressionProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instrument : str
|
||||
a certain instrument
|
||||
a certain instrument.
|
||||
field : str
|
||||
a certain field of feature
|
||||
a certain field of feature.
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency, available: year/quarter/month/week/day
|
||||
time frequency, available: year/quarter/month/week/day.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -325,20 +325,20 @@ class DatasetProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instruments : list or dict
|
||||
list/dict of instruments or dict of stockpool config
|
||||
list/dict of instruments or dict of stockpool config.
|
||||
fields : list
|
||||
list of feature instances
|
||||
list of feature instances.
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency
|
||||
time frequency.
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.DataFrame
|
||||
a pandas dataframe with <instrument, datetime> index
|
||||
a pandas dataframe with <instrument, datetime> index.
|
||||
"""
|
||||
raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method")
|
||||
|
||||
@@ -357,17 +357,17 @@ class DatasetProvider(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
instruments : list or dict
|
||||
list/dict of instruments or dict of stockpool config
|
||||
list/dict of instruments or dict of stockpool config.
|
||||
fields : list
|
||||
list of feature instances
|
||||
list of feature instances.
|
||||
start_time : str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time : str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
freq : str
|
||||
time frequency
|
||||
time frequency.
|
||||
disk_cache : int
|
||||
whether to skip(0)/use(1)/replace(2) disk_cache
|
||||
whether to skip(0)/use(1)/replace(2) disk_cache.
|
||||
|
||||
"""
|
||||
return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache)
|
||||
@@ -526,7 +526,7 @@ class LocalCalendarProvider(CalendarProvider):
|
||||
Parameters
|
||||
----------
|
||||
freq : str
|
||||
frequency of read calendar file
|
||||
frequency of read calendar file.
|
||||
|
||||
Returns
|
||||
----------
|
||||
|
||||
@@ -17,7 +17,7 @@ class Dataset(Serializable):
|
||||
init is designed to finish following steps:
|
||||
|
||||
- setup data
|
||||
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
|
||||
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing.
|
||||
|
||||
- initialize the state of the dataset(info to prepare the data)
|
||||
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
|
||||
@@ -29,17 +29,17 @@ class Dataset(Serializable):
|
||||
|
||||
def setup_data(self, *args, **kwargs):
|
||||
"""
|
||||
setup the data
|
||||
Setup the data.
|
||||
|
||||
We split the setup_data function for following situation:
|
||||
|
||||
- User have a Dataset object with learned status on disk
|
||||
- User have a Dataset object with learned status on disk.
|
||||
|
||||
- User load the Dataset object from the disk(Note the init function is skiped)
|
||||
- User load the Dataset object from the disk(Note the init function is skiped).
|
||||
|
||||
- User call `setup_data` to load new data
|
||||
- User call `setup_data` to load new data.
|
||||
|
||||
- User prepare data for model based on previous status
|
||||
- User prepare data for model based on previous status.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -66,9 +66,10 @@ class DatasetH(Dataset):
|
||||
|
||||
User should try to put the data preprocessing functions into handler.
|
||||
Only following data processing functions should be placed in Dataset:
|
||||
|
||||
- The processing is related to specific model.
|
||||
|
||||
- The processing is related to data split
|
||||
- The processing is related to data split.
|
||||
"""
|
||||
|
||||
def __init__(self, handler: Union[dict, DataHandler], segments: list):
|
||||
@@ -76,15 +77,15 @@ class DatasetH(Dataset):
|
||||
Parameters
|
||||
----------
|
||||
handler : Union[dict, DataHandler]
|
||||
handler will be passed into setup_data
|
||||
handler will be passed into setup_data.
|
||||
segments : list
|
||||
handler will be passed into setup_data
|
||||
handler will be passed into setup_data.
|
||||
"""
|
||||
super().__init__(handler, segments)
|
||||
|
||||
def setup_data(self, handler: Union[dict, DataHandler], segments: list):
|
||||
"""
|
||||
setup the underlying data
|
||||
Setup the underlying data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -121,7 +122,7 @@ class DatasetH(Dataset):
|
||||
**kwargs,
|
||||
) -> Union[List[pd.DataFrame], pd.DataFrame]:
|
||||
"""
|
||||
prepare the data for learning and inference
|
||||
Prepare the data for learning and inference.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -132,11 +133,12 @@ class DatasetH(Dataset):
|
||||
- 'train'
|
||||
|
||||
- ['train', 'valid']
|
||||
|
||||
col_set : str
|
||||
The col_set will be passed to self._handler when fetching data
|
||||
data_key: str
|
||||
The col_set will be passed to self._handler when fetching data.
|
||||
data_key : str
|
||||
The data to fetch: DK_*
|
||||
Default is DK_I, which indicate fetching data for **inference**
|
||||
Default is DK_I, which indicate fetching data for **inference**.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
@@ -29,7 +29,7 @@ class DataHandler(Serializable):
|
||||
"""
|
||||
The steps to using a handler
|
||||
1. initialized data handler (call by `init`).
|
||||
2. use the data
|
||||
2. use the data.
|
||||
|
||||
|
||||
The data handler try to maintain a handler with 2 level.
|
||||
@@ -65,17 +65,17 @@ class DataHandler(Serializable):
|
||||
Parameters
|
||||
----------
|
||||
instruments :
|
||||
The stock list to retrive
|
||||
The stock list to retrive.
|
||||
start_time :
|
||||
start_time of the original data
|
||||
start_time of the original data.
|
||||
end_time :
|
||||
end_time of the original data
|
||||
end_time of the original data.
|
||||
data_loader : Tuple[dict, str, DataLoader]
|
||||
data loader to load the data
|
||||
data loader to load the data.
|
||||
init_data :
|
||||
intialize the original data in the constructor
|
||||
intialize the original data in the constructor.
|
||||
fetch_orig : bool
|
||||
Return the original data instead of copy if possible
|
||||
Return the original data instead of copy if possible.
|
||||
"""
|
||||
# Set logger
|
||||
self.logger = get_module_logger("DataHandler")
|
||||
@@ -219,9 +219,9 @@ class DataHandler(Serializable):
|
||||
get a iterator of sliced data with given periods
|
||||
|
||||
Args:
|
||||
periods (int): number of periods
|
||||
min_periods (int): minimum periods for sliced dataframe
|
||||
kwargs (dict): will be passed to `self.fetch`
|
||||
periods (int): number of periods.
|
||||
min_periods (int): minimum periods for sliced dataframe.
|
||||
kwargs (dict): will be passed to `self.fetch`.
|
||||
"""
|
||||
trading_dates = self._data.index.unique(level="datetime")
|
||||
if min_periods is None:
|
||||
@@ -377,7 +377,7 @@ class DataHandlerLP(DataHandler):
|
||||
Parameters
|
||||
----------
|
||||
init_type : str
|
||||
The type `IT_*` listed above
|
||||
The type `IT_*` listed above.
|
||||
enable_cache : bool
|
||||
default value is false:
|
||||
|
||||
@@ -419,13 +419,13 @@ class DataHandlerLP(DataHandler):
|
||||
Parameters
|
||||
----------
|
||||
selector : Union[pd.Timestamp, slice, str]
|
||||
describe how to select data by index
|
||||
describe how to select data by index.
|
||||
level : Union[str, int]
|
||||
which index level to select the data
|
||||
which index level to select the data.
|
||||
col_set : str
|
||||
select a set of meaningful columns.(e.g. features, columns)
|
||||
data_key: str
|
||||
The data to fetch: DK_*
|
||||
select a set of meaningful columns.(e.g. features, columns).
|
||||
data_key : str
|
||||
the data to fetch: DK_*.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -443,9 +443,9 @@ class DataHandlerLP(DataHandler):
|
||||
Parameters
|
||||
----------
|
||||
col_set : str
|
||||
select a set of meaningful columns.(e.g. features, columns)
|
||||
data_key: str
|
||||
The data to fetch: DK_*
|
||||
select a set of meaningful columns.(e.g. features, columns).
|
||||
data_key : str
|
||||
the data to fetch: DK_*.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
@@ -100,16 +100,16 @@ class DLWParser(DataLoader):
|
||||
Parameters
|
||||
----------
|
||||
instruments :
|
||||
the instruments
|
||||
the instruments.
|
||||
exprs : list
|
||||
The expressions to describe the content of the data
|
||||
the expressions to describe the content of the data.
|
||||
names : list
|
||||
The name of the data
|
||||
the name of the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame:
|
||||
the queried dataframe
|
||||
the queried dataframe.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ def get_group_columns(df: pd.DataFrame, group: str):
|
||||
Parameters
|
||||
----------
|
||||
df : pd.DataFrame
|
||||
with multi of columns
|
||||
with multi of columns.
|
||||
group : str
|
||||
the name of the feature group, i.e. the first level value of the group index.
|
||||
"""
|
||||
@@ -56,7 +56,7 @@ class Processor(Serializable):
|
||||
Parameters
|
||||
----------
|
||||
df : pd.DataFrame
|
||||
The raw_df of handler or result from previous processor
|
||||
The raw_df of handler or result from previous processor.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -68,7 +68,7 @@ class Processor(Serializable):
|
||||
Returns
|
||||
-------
|
||||
bool:
|
||||
if it is usable for infenrece
|
||||
if it is usable for infenrece.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ class BaseDFilter(abc.ABC):
|
||||
Parameters
|
||||
----------
|
||||
config : dict
|
||||
dict of config parameters
|
||||
dict of config parameters.
|
||||
"""
|
||||
raise NotImplementedError("Subclass of BaseDFilter must reimplement `from_config` method")
|
||||
|
||||
@@ -43,7 +43,7 @@ class BaseDFilter(abc.ABC):
|
||||
Returns
|
||||
----------
|
||||
dict
|
||||
return the dict of config parameters
|
||||
return the dict of config parameters.
|
||||
"""
|
||||
raise NotImplementedError("Subclass of BaseDFilter must reimplement `to_config` method")
|
||||
|
||||
@@ -69,9 +69,9 @@ class SeriesDFilter(BaseDFilter):
|
||||
Parameters
|
||||
----------
|
||||
fstart_time: str
|
||||
the time for the filter rule to start filter the instruments
|
||||
the time for the filter rule to start filter the instruments.
|
||||
fend_time: str
|
||||
the time for the filter rule to stop filter the instruments
|
||||
the time for the filter rule to stop filter the instruments.
|
||||
"""
|
||||
super(SeriesDFilter, self).__init__()
|
||||
self.filter_start_time = pd.Timestamp(fstart_time) if fstart_time else None
|
||||
@@ -83,12 +83,12 @@ class SeriesDFilter(BaseDFilter):
|
||||
Parameters
|
||||
----------
|
||||
instruments: dict
|
||||
the dict of instruments in the form {instrument_name => list of timestamp tuple}
|
||||
the dict of instruments in the form {instrument_name => list of timestamp tuple}.
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Timestamp, pd.Timestamp
|
||||
the lower time bound and upper time bound of all the instruments
|
||||
the lower time bound and upper time bound of all the instruments.
|
||||
"""
|
||||
trange = Cal.calendar(freq=self.filter_freq)
|
||||
ubound, lbound = trange[0], trange[-1]
|
||||
@@ -105,14 +105,14 @@ class SeriesDFilter(BaseDFilter):
|
||||
Parameters
|
||||
----------
|
||||
time_range : D.calendar
|
||||
the time range of the instruments
|
||||
the time range of the instruments.
|
||||
target_timestamp : list
|
||||
the list of tuple (timestamp, timestamp)
|
||||
the list of tuple (timestamp, timestamp).
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Series
|
||||
the series of bool value for an instrument
|
||||
the series of bool value for an instrument.
|
||||
"""
|
||||
# Construct a whole dict of {date => bool}
|
||||
timestamp_series = {timestamp: False for timestamp in time_range}
|
||||
@@ -124,19 +124,19 @@ class SeriesDFilter(BaseDFilter):
|
||||
return timestamp_series
|
||||
|
||||
def _filterSeries(self, timestamp_series, filter_series):
|
||||
"""Filter the timestamp series with filter series by using element-wise AND operation of the two series
|
||||
"""Filter the timestamp series with filter series by using element-wise AND operation of the two series.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp_series : pd.Series
|
||||
the series of bool value indicating existing time
|
||||
the series of bool value indicating existing time.
|
||||
filter_series : pd.Series
|
||||
the series of bool value indicating filter feature
|
||||
the series of bool value indicating filter feature.
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Series
|
||||
the series of bool value indicating whether the date satisfies the filter condition and exists in target timestamp
|
||||
the series of bool value indicating whether the date satisfies the filter condition and exists in target timestamp.
|
||||
"""
|
||||
fstart, fend = list(filter_series.keys())[0], list(filter_series.keys())[-1]
|
||||
filter_series = filter_series.astype("bool") # Make sure the filter_series is boolean
|
||||
@@ -144,17 +144,17 @@ class SeriesDFilter(BaseDFilter):
|
||||
return timestamp_series
|
||||
|
||||
def _toTimestamp(self, timestamp_series):
|
||||
"""Convert the timestamp series to a list of tuple (timestamp, timestamp) indicating a continuous range of TRUE
|
||||
"""Convert the timestamp series to a list of tuple (timestamp, timestamp) indicating a continuous range of TRUE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp_series: pd.Series
|
||||
the series of bool value after being filtered
|
||||
the series of bool value after being filtered.
|
||||
|
||||
Returns
|
||||
----------
|
||||
list
|
||||
the list of tuple (timestamp, timestamp)
|
||||
the list of tuple (timestamp, timestamp).
|
||||
"""
|
||||
# sort the timestamp_series according to the timestamps
|
||||
timestamp_series.sort_index()
|
||||
@@ -194,18 +194,18 @@ class SeriesDFilter(BaseDFilter):
|
||||
Parameters
|
||||
----------
|
||||
instruments : dict
|
||||
the dict of instruments to be filtered
|
||||
the dict of instruments to be filtered.
|
||||
fstart : pd.Timestamp
|
||||
start time of filter
|
||||
start time of filter.
|
||||
fend : pd.Timestamp
|
||||
end time of filter
|
||||
end time of filter.
|
||||
|
||||
.. note:: fstart/fend indicates the intersection of instruments start/end time and filter start/end time
|
||||
.. note:: fstart/fend indicates the intersection of instruments start/end time and filter start/end time.
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Dataframe
|
||||
a series of {pd.Timestamp => bool}
|
||||
a series of {pd.Timestamp => bool}.
|
||||
"""
|
||||
raise NotImplementedError("Subclass of SeriesDFilter must reimplement `getFilterSeries` method")
|
||||
|
||||
@@ -215,16 +215,16 @@ class SeriesDFilter(BaseDFilter):
|
||||
Parameters
|
||||
----------
|
||||
instruments: dict
|
||||
input instruments to be filtered
|
||||
input instruments to be filtered.
|
||||
start_time: str
|
||||
start of the time range
|
||||
start of the time range.
|
||||
end_time: str
|
||||
end of the time range
|
||||
end of the time range.
|
||||
|
||||
Returns
|
||||
----------
|
||||
dict
|
||||
filtered instruments, same structure as input instruments
|
||||
filtered instruments, same structure as input instruments.
|
||||
"""
|
||||
lbound, ubound = self._getTimeBound(instruments)
|
||||
start_time = pd.Timestamp(start_time or lbound)
|
||||
@@ -272,7 +272,7 @@ class NameDFilter(SeriesDFilter):
|
||||
params:
|
||||
------
|
||||
name_rule_re: str
|
||||
regular expression for the name rule
|
||||
regular expression for the name rule.
|
||||
"""
|
||||
super(NameDFilter, self).__init__(fstart_time, fend_time)
|
||||
self.name_rule_re = name_rule_re
|
||||
@@ -325,13 +325,13 @@ class ExpressionDFilter(SeriesDFilter):
|
||||
params:
|
||||
------
|
||||
fstart_time: str
|
||||
filter the feature starting from this time
|
||||
filter the feature starting from this time.
|
||||
fend_time: str
|
||||
filter the feature ending by this time
|
||||
filter the feature ending by this time.
|
||||
rule_expression: str
|
||||
an input expression for the rule
|
||||
an input expression for the rule.
|
||||
keep: bool
|
||||
whether to keep the instruments of which features don't exist in the filter time span
|
||||
whether to keep the instruments of which features don't exist in the filter time span.
|
||||
"""
|
||||
super(ExpressionDFilter, self).__init__(fstart_time, fend_time)
|
||||
self.rule_expression = rule_expression
|
||||
|
||||
Reference in New Issue
Block a user