1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 03:21:00 +08:00

black format

This commit is contained in:
bxdd
2021-01-25 17:59:48 +00:00
parent ffedb6382f
commit 06dbd02b99
8 changed files with 34 additions and 19 deletions

View File

@@ -50,7 +50,6 @@ class HighFreqHandler(DataHandlerLP):
drop_raw=drop_raw,
)
def get_feature_config(self):
fields = []
names = []
@@ -98,7 +97,7 @@ class HighFreqHandler(DataHandlerLP):
)
]
names += ["$open", "$high", "$low", "$close", "$vwap"]
fields += [
"Ref({0}, 240)/Ref(DayLast({1}), 240)".format(
template_if.format(
@@ -205,7 +204,9 @@ class HighFreqBacktestHandler(DataHandler):
template_paused = "Select(Eq($paused, 0.0), {0})"
template_fillnan = "FFillNan({0})"
fields += [template_fillnan.format(template_paused.format("$close")),]
fields += [
template_fillnan.format(template_paused.format("$close")),
]
names += ["$close0"]
fields += [
"If(Eq({1}, np.nan), 0, If(Or(Gt({2}, Mul(1.001, {4})), Lt({2}, Mul(0.999, {3}))), 0, {1}))".format(

View File

@@ -9,7 +9,7 @@ from qlib.data.data import Cal
class DayFirst(ElemOperator):
def __init__(self, feature):
super(DayFirst, self).__init__(feature, "day_first")
def _load_internal(self, instrument, start_index, end_index, freq):
_calendar = Cal.get_calender_day(freq=freq)[0]
series = self.feature.load(instrument, start_index, end_index, freq)
@@ -44,6 +44,7 @@ class Date(ElemOperator):
series = self.feature.load(instrument, start_index, end_index, freq)
return pd.Series(_calendar[series.index], index=series.index)
class Select(PairOperator):
def __init__(self, condition, feature):
super(Select, self).__init__(condition, feature, "select")
@@ -53,10 +54,11 @@ class Select(PairOperator):
series_feature = self.feature_right.load(instrument, start_index, end_index, freq)
return series_feature.loc[series_condition]
class IsNull(ElemOperator):
def __init__(self, feature):
super(IsNull, self).__init__(feature, "isnull")
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.isnull()
return series.isnull()

View File

@@ -62,9 +62,9 @@ class HighFreqNorm(Processor):
feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
df_new_features = pd.DataFrame(
data=np.concatenate((feat, feat_1), axis=1),
index=idx,
columns=["FEATURE_%d" % i for i in range(12 * 240)],
data=np.concatenate((feat, feat_1), axis=1),
index=idx,
columns=["FEATURE_%d" % i for i in range(12 * 240)],
).sort_index()
return df_new_features
return df_new_features

View File

@@ -24,6 +24,7 @@ from qlib.data.data import Cal
from highfreq_ops import DayFirst, DayLast, FFillNan, Date, Select, IsNull
def save_dataset(dataset, path: [Path, str]):
"""
save dataset to path
@@ -35,6 +36,7 @@ def save_dataset(dataset, path: [Path, str]):
"""
dataset.to_pickle(path=path)
def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS):
"""
load dataset from path
@@ -48,7 +50,7 @@ def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS):
- if `init_type` == DataHandlerLP.IT_FIT_SEQ:
the input of `DataHandlerLP.fit` will be the output of the previous processor
- if `init_type` == DataHandlerLP.IT_FIT_IND:
the input of `DataHandlerLP.fit` will be the original df
@@ -57,17 +59,24 @@ def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS):
The state of the object has been load by pickle
"""
fd = open(path, 'rb')
fd = open(path, "rb")
dataset = pickle.load(fd)
dataset.init(init_type=init_type)
fd.close()
return dataset
if __name__ == "__main__":
# use default data
provider_uri = "/mnt/v-xiabi/data/qlib/high_freq" # target_dir
qlib.init(provider_uri=provider_uri, custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], redis_port=233, region=REG_CN, auto_mount=False)
qlib.init(
provider_uri=provider_uri,
custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull],
redis_port=233,
region=REG_CN,
auto_mount=False,
)
MARKET = "csi300"
BENCHMARK = "SH000300"
@@ -134,4 +143,3 @@ if __name__ == "__main__":
Cal.get_calender_day(freq="1min") # TO FIX: load the calendar day for cache
dataset = init_instance_by_config(task["dataset"])
dataset_backtest = init_instance_by_config(task["dataset_backtest"])

View File

@@ -132,7 +132,7 @@ class CalendarProvider(abc.ABC):
_calendar_index = {x: i for i, x in enumerate(_calendar)} # for fast search
H["c"][flag] = _calendar, _calendar_index
return _calendar, _calendar_index
def _uri(self, start_time, end_time, freq, future=False):
"""Get the uri of calendar generation task."""
return hash_args(start_time, end_time, freq, future)

View File

@@ -87,7 +87,6 @@ class DatasetH(Dataset):
"""
super().__init__(handler, segments)
def init(self, init_type: str = DataHandlerLP.IT_FIT_SEQ, enable_cache: bool = False):
"""
Initialize the data of Qlib
@@ -98,7 +97,7 @@ class DatasetH(Dataset):
- if `init_type` == DataHandlerLP.IT_FIT_SEQ:
the input of `DataHandlerLP.fit` will be the output of the previous processor
- if `init_type` == DataHandlerLP.IT_FIT_IND:
the input of `DataHandlerLP.fit` will be the original df

View File

@@ -389,6 +389,7 @@ class DataHandlerLP(DataHandler):
if self.drop_raw:
del self._data
# init type
IT_FIT_SEQ = "fit_seq" # the input of `fit` will be the output of the previous processor
IT_FIT_IND = "fit_ind" # the input of `fit` will be the original df
@@ -431,7 +432,7 @@ class DataHandlerLP(DataHandler):
df = getattr(self, {self.DK_R: "_data", self.DK_I: "_infer", self.DK_L: "_learn"}[data_key])
except AttributeError:
print("please set drop_raw = False if you want to use raw data")
raise
raise
return df
def fetch(

View File

@@ -94,7 +94,9 @@ class DLWParser(DataLoader):
return exprs, names
@abc.abstractmethod
def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day") -> pd.DataFrame:
def load_group_df(
self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day"
) -> pd.DataFrame:
"""
load the dataframe for specific group
@@ -148,7 +150,9 @@ class QlibDataLoader(DLWParser):
print("swap level", swap_level)
super().__init__(config)
def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day") -> pd.DataFrame:
def load_group_df(
self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day"
) -> pd.DataFrame:
if instruments is None:
warnings.warn("`instruments` is not set, will load all stocks")
instruments = "all"