diff --git a/examples/high_freq/highfreq_handler.py b/examples/high_freq/highfreq_handler.py index 32557f768..13d0e8298 100644 --- a/examples/high_freq/highfreq_handler.py +++ b/examples/high_freq/highfreq_handler.py @@ -50,7 +50,6 @@ class HighFreqHandler(DataHandlerLP): drop_raw=drop_raw, ) - def get_feature_config(self): fields = [] names = [] @@ -98,7 +97,7 @@ class HighFreqHandler(DataHandlerLP): ) ] names += ["$open", "$high", "$low", "$close", "$vwap"] - + fields += [ "Ref({0}, 240)/Ref(DayLast({1}), 240)".format( template_if.format( @@ -205,7 +204,9 @@ class HighFreqBacktestHandler(DataHandler): template_paused = "Select(Eq($paused, 0.0), {0})" template_fillnan = "FFillNan({0})" - fields += [template_fillnan.format(template_paused.format("$close")),] + fields += [ + template_fillnan.format(template_paused.format("$close")), + ] names += ["$close0"] fields += [ "If(Eq({1}, np.nan), 0, If(Or(Gt({2}, Mul(1.001, {4})), Lt({2}, Mul(0.999, {3}))), 0, {1}))".format( diff --git a/examples/high_freq/highfreq_ops.py b/examples/high_freq/highfreq_ops.py index f6470d68e..4d35da9c4 100644 --- a/examples/high_freq/highfreq_ops.py +++ b/examples/high_freq/highfreq_ops.py @@ -9,7 +9,7 @@ from qlib.data.data import Cal class DayFirst(ElemOperator): def __init__(self, feature): super(DayFirst, self).__init__(feature, "day_first") - + def _load_internal(self, instrument, start_index, end_index, freq): _calendar = Cal.get_calender_day(freq=freq)[0] series = self.feature.load(instrument, start_index, end_index, freq) @@ -44,6 +44,7 @@ class Date(ElemOperator): series = self.feature.load(instrument, start_index, end_index, freq) return pd.Series(_calendar[series.index], index=series.index) + class Select(PairOperator): def __init__(self, condition, feature): super(Select, self).__init__(condition, feature, "select") @@ -53,10 +54,11 @@ class Select(PairOperator): series_feature = self.feature_right.load(instrument, start_index, end_index, freq) return series_feature.loc[series_condition] + class IsNull(ElemOperator): def __init__(self, feature): super(IsNull, self).__init__(feature, "isnull") def _load_internal(self, instrument, start_index, end_index, freq): series = self.feature.load(instrument, start_index, end_index, freq) - return series.isnull() \ No newline at end of file + return series.isnull() diff --git a/examples/high_freq/highfreq_processor.py b/examples/high_freq/highfreq_processor.py index fc86b1a70..dc8792a57 100644 --- a/examples/high_freq/highfreq_processor.py +++ b/examples/high_freq/highfreq_processor.py @@ -62,9 +62,9 @@ class HighFreqNorm(Processor): feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240) df_new_features = pd.DataFrame( - data=np.concatenate((feat, feat_1), axis=1), - index=idx, - columns=["FEATURE_%d" % i for i in range(12 * 240)], + data=np.concatenate((feat, feat_1), axis=1), + index=idx, + columns=["FEATURE_%d" % i for i in range(12 * 240)], ).sort_index() - return df_new_features \ No newline at end of file + return df_new_features diff --git a/examples/high_freq/workflow.py b/examples/high_freq/workflow.py index 83a344b0f..dc13bd245 100644 --- a/examples/high_freq/workflow.py +++ b/examples/high_freq/workflow.py @@ -24,6 +24,7 @@ from qlib.data.data import Cal from highfreq_ops import DayFirst, DayLast, FFillNan, Date, Select, IsNull + def save_dataset(dataset, path: [Path, str]): """ save dataset to path @@ -35,6 +36,7 @@ def save_dataset(dataset, path: [Path, str]): """ dataset.to_pickle(path=path) + def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS): """ load dataset from path @@ -48,7 +50,7 @@ def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS): - if `init_type` == DataHandlerLP.IT_FIT_SEQ: the input of `DataHandlerLP.fit` will be the output of the previous processor - + - if `init_type` == DataHandlerLP.IT_FIT_IND: the input of `DataHandlerLP.fit` will be the original df @@ -57,17 +59,24 @@ def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS): The state of the object has been load by pickle """ - fd = open(path, 'rb') + fd = open(path, "rb") dataset = pickle.load(fd) dataset.init(init_type=init_type) fd.close() return dataset + if __name__ == "__main__": # use default data provider_uri = "/mnt/v-xiabi/data/qlib/high_freq" # target_dir - qlib.init(provider_uri=provider_uri, custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], redis_port=233, region=REG_CN, auto_mount=False) + qlib.init( + provider_uri=provider_uri, + custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], + redis_port=233, + region=REG_CN, + auto_mount=False, + ) MARKET = "csi300" BENCHMARK = "SH000300" @@ -134,4 +143,3 @@ if __name__ == "__main__": Cal.get_calender_day(freq="1min") # TO FIX: load the calendar day for cache dataset = init_instance_by_config(task["dataset"]) dataset_backtest = init_instance_by_config(task["dataset_backtest"]) - diff --git a/qlib/data/data.py b/qlib/data/data.py index 3021ebe82..030f56494 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -132,7 +132,7 @@ class CalendarProvider(abc.ABC): _calendar_index = {x: i for i, x in enumerate(_calendar)} # for fast search H["c"][flag] = _calendar, _calendar_index return _calendar, _calendar_index - + def _uri(self, start_time, end_time, freq, future=False): """Get the uri of calendar generation task.""" return hash_args(start_time, end_time, freq, future) diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index df7af3f5e..adad6cfb8 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -87,7 +87,6 @@ class DatasetH(Dataset): """ super().__init__(handler, segments) - def init(self, init_type: str = DataHandlerLP.IT_FIT_SEQ, enable_cache: bool = False): """ Initialize the data of Qlib @@ -98,7 +97,7 @@ class DatasetH(Dataset): - if `init_type` == DataHandlerLP.IT_FIT_SEQ: the input of `DataHandlerLP.fit` will be the output of the previous processor - + - if `init_type` == DataHandlerLP.IT_FIT_IND: the input of `DataHandlerLP.fit` will be the original df diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 9dfc4746a..0e6093bd9 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -389,6 +389,7 @@ class DataHandlerLP(DataHandler): if self.drop_raw: del self._data + # init type IT_FIT_SEQ = "fit_seq" # the input of `fit` will be the output of the previous processor IT_FIT_IND = "fit_ind" # the input of `fit` will be the original df @@ -431,7 +432,7 @@ class DataHandlerLP(DataHandler): df = getattr(self, {self.DK_R: "_data", self.DK_I: "_infer", self.DK_L: "_learn"}[data_key]) except AttributeError: print("please set drop_raw = False if you want to use raw data") - raise + raise return df def fetch( diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index c6d06b57f..324ff9a4f 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -94,7 +94,9 @@ class DLWParser(DataLoader): return exprs, names @abc.abstractmethod - def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day") -> pd.DataFrame: + def load_group_df( + self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day" + ) -> pd.DataFrame: """ load the dataframe for specific group @@ -148,7 +150,9 @@ class QlibDataLoader(DLWParser): print("swap level", swap_level) super().__init__(config) - def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day") -> pd.DataFrame: + def load_group_df( + self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day" + ) -> pd.DataFrame: if instruments is None: warnings.warn("`instruments` is not set, will load all stocks") instruments = "all"