diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py index 4ec8f3dd2..6ed68ff38 100644 --- a/examples/highfreq/highfreq_processor.py +++ b/examples/highfreq/highfreq_processor.py @@ -71,7 +71,7 @@ class HighFreqNorm(Processor): ).sort_index() return df_new_features - def config(fit_start_time=None, fit_end_time=None, **kwargs): + def config(self, fit_start_time=None, fit_end_time=None, **kwargs): if fit_start_time: self.fit_start_time = fit_start_time if fit_end_time: diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py index 0b48b971f..97762f182 100644 --- a/examples/highfreq/workflow.py +++ b/examples/highfreq/workflow.py @@ -31,7 +31,7 @@ class HighfreqWorkflow(object): SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None} - MARKET = "csi300" + MARKET = "all" start_time = "2020-09-15 00:00:00" end_time = "2021-01-18 16:00:00" diff --git a/examples/rolling_process_data/workflow.py b/examples/rolling_process_data/workflow.py index 0be88dddc..ffdd8329a 100644 --- a/examples/rolling_process_data/workflow.py +++ b/examples/rolling_process_data/workflow.py @@ -101,15 +101,16 @@ class RollingDataWorkflow(object): print(f"===========rolling{rolling_offset} start===========") if rolling_offset: - dataset.init( + dataset.config( handler_kwargs={ - "init_type": DataHandlerLP.IT_FIT_SEQ, "start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]), "end_time": datetime(test_end_time[0] + rolling_offset, *test_end_time[1:]), - "fit_start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]), - "fit_end_time": datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]), + "processor_kwargs":{ + "fit_start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]), + "fit_end_time": datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]), + }, }, - segment_kwargs={ + segments={ "train": ( datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]), datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]), @@ -124,6 +125,9 @@ class RollingDataWorkflow(object): ), }, ) + dataset.setup_data( + handler_kwargs={"init_type": DataHandlerLP.IT_FIT_SEQ,} + ) dtrain, dvalid, dtest = dataset.prepare(["train", "valid", "test"]) print(dtrain, dvalid, dtest) diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 712cd6232..4adef23a0 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -407,7 +407,7 @@ class DataHandlerLP(DataHandler): if self.drop_raw: del self._data - def config(self, processors_kwargs:dict = None, **kwargs): + def config(self, processor_kwargs:dict = None, **kwargs): """ configuration of data. # what data to be loaded from data source @@ -417,7 +417,7 @@ class DataHandlerLP(DataHandler): """ super().config(**kwargs) - if processors_kwargs is not None: + if processor_kwargs is not None: for processor in self.get_all_processors(): processor.config(**processor_kwargs) diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py index e14e85831..5be178c5c 100755 --- a/qlib/data/dataset/processor.py +++ b/qlib/data/dataset/processor.py @@ -72,7 +72,7 @@ class Processor(Serializable): """ return True - def config(**kwargs): + def config(self, **kwargs): super().config(kwargs.get("dump_all", None), kwargs.get("exclude", None)) @@ -195,7 +195,7 @@ class MinMaxNorm(Processor): df.loc(axis=1)[self.cols] = normalize(df[self.cols].values) return df - def config(fit_start_time=None, fit_end_time=None, **kwargs): + def config(self, fit_start_time=None, fit_end_time=None, **kwargs): if fit_start_time: self.fit_start_time = fit_start_time if fit_end_time: @@ -230,7 +230,7 @@ class ZScoreNorm(Processor): df.loc(axis=1)[self.cols] = normalize(df[self.cols].values) return df - def config(fit_start_time=None, fit_end_time=None, **kwargs): + def config(self, fit_start_time=None, fit_end_time=None, **kwargs): if fit_start_time: self.fit_start_time = fit_start_time if fit_end_time: @@ -273,7 +273,7 @@ class RobustZScoreNorm(Processor): df.clip(-3, 3, inplace=True) return df - def config(fit_start_time=None, fit_end_time=None, **kwargs): + def config(self, fit_start_time=None, fit_end_time=None, **kwargs): if fit_start_time: self.fit_start_time = fit_start_time if fit_end_time: