update version for releasing

fix contrib data freq
simplify parameters
2026-06-06 14:01:28 +08:00 · 2021-02-02 09:24:33 +00:00 · 2021-02-02 16:52:50 +08:00 · 2021-02-02 16:52:50 +08:00 · 2021-02-02 16:52:50 +08:00 · 2021-02-02 14:48:12 +08:00
18 changed files with 81 additions and 49 deletions
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@@ -10,7 +10,6 @@ class HighFreqHandler(DataHandlerLP):
        instruments="csi300",
        start_time=None,
        end_time=None,
-        freq="1min",
        infer_processors=[],
        learn_processors=[],
        fit_start_time=None,
@@ -37,13 +36,13 @@ class HighFreqHandler(DataHandlerLP):
            "kwargs": {
                "config": self.get_feature_config(),
                "swap_level": False,
+                "freq": "1min",
            },
        }
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            freq=freq,
            data_loader=data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
@@ -124,20 +123,19 @@ class HighFreqBacktestHandler(DataHandler):
        instruments="csi300",
        start_time=None,
        end_time=None,
-        freq="1min",
    ):
        data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": self.get_feature_config(),
                "swap_level": False,
+                "freq": "1min",
            },
        }
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            freq=freq,
            data_loader=data_loader,
        )

--- a/qlib/init.py
+++ b/qlib/init.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.


-__version__ = "0.6.1.99"
+__version__ = "0.6.2"


 import os
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -90,7 +90,6 @@ _default_config = {
    # How many tasks belong to one process. Recommend 1 for high-frequency data and None for daily data.
    "maxtasksperchild": None,
    "default_disk_cache": 1,  # 0:skip/1:use
-    "disable_disk_cache": False,  # disable disk cache; if High-frequency data generally disable_disk_cache=True
    "mem_cache_size_limit": 500,
    # memory cache expire second, only in used 'DatasetURICache' and 'client D.calendar'
    # default 1 hour
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -54,6 +54,7 @@ class Alpha360(DataHandlerLP):
        learn_processors=_DEFAULT_LEARN_PROCESSORS,
        fit_start_time=None,
        fit_end_time=None,
+        filter_pipe=None,
        **kwargs,
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
@@ -66,6 +67,8 @@ class Alpha360(DataHandlerLP):
                    "feature": self.get_feature_config(),
                    "label": kwargs.get("label", self.get_label_config()),
                },
+                "filter_pipe": filter_pipe,
+                "freq": freq,
            },
        }

@@ -73,7 +76,6 @@ class Alpha360(DataHandlerLP):
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            freq="day",
            data_loader=data_loader,
            learn_processors=learn_processors,
            infer_processors=infer_processors,
@@ -138,6 +140,7 @@ class Alpha158(DataHandlerLP):
        fit_start_time=None,
        fit_end_time=None,
        process_type=DataHandlerLP.PTYPE_A,
+        filter_pipe=None,
        **kwargs,
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
@@ -146,14 +149,18 @@ class Alpha158(DataHandlerLP):
        data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
-                "config": {"feature": self.get_feature_config(), "label": kwargs.get("label", self.get_label_config())},
+                "config": {
+                    "feature": self.get_feature_config(),
+                    "label": kwargs.get("label", self.get_label_config()),
+                },
+                "filter_pipe": filter_pipe,
+                "freq": freq,
            },
        }
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            freq=freq,
            data_loader=data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -56,7 +56,7 @@ class ALSTM(Model):
        early_stop=20,
        loss="mse",
        optimizer="adam",
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -58,7 +58,7 @@ class ALSTM(Model):
        loss="mse",
        optimizer="adam",
        n_jobs=10,
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -61,7 +61,7 @@ class GATs(Model):
        with_pretrain=True,
        model_path=None,
        optimizer="adam",
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -56,7 +56,7 @@ class GRU(Model):
        early_stop=20,
        loss="mse",
        optimizer="adam",
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -58,7 +58,7 @@ class GRU(Model):
        loss="mse",
        optimizer="adam",
        n_jobs=10,
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -56,7 +56,7 @@ class LSTM(Model):
        early_stop=20,
        loss="mse",
        optimizer="adam",
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -58,7 +58,7 @@ class LSTM(Model):
        loss="mse",
        optimizer="adam",
        n_jobs=10,
-        GPU="0",
+        GPU=0,
        seed=None,
        **kwargs
    ):
--- a/qlib/contrib/model/pytorch_nn.py
+++ b/qlib/contrib/model/pytorch_nn.py
@@ -60,7 +60,7 @@ class DNNModelPytorch(Model):
        lr_decay_steps=100,
        optimizer="gd",
        loss="mse",
-        GPU="0",
+        GPU=0,
        seed=None,
        weight_decay=0.0,
        **kwargs
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -961,8 +961,7 @@ class BaseProvider:
        is a provider class.
        """
        disk_cache = C.default_disk_cache if disk_cache is None else disk_cache
-        if C.disable_disk_cache:
-            disk_cache = False
+        fields = list(fields)  # In case of tuple.
        try:
            return DatasetD.dataset(instruments, fields, start_time, end_time, freq, disk_cache)
        except TypeError:
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -76,13 +76,13 @@ class DatasetH(Dataset):
    - The processing is related to data split.
    """

-    def __init__(self, handler: Union[dict, DataHandler], segments: list):
+    def __init__(self, handler: Union[dict, DataHandler], segments: dict):
        """
        Parameters
        ----------
        handler : Union[dict, DataHandler]
            handler will be passed into setup_data.
-        segments : list
+        segments : dict
            handler will be passed into setup_data.
        """
        super().__init__(handler, segments)
@@ -91,7 +91,7 @@ class DatasetH(Dataset):
        """Initialize the DatasetH, Only parameters belonging to handler.init will be passed in"""
        self.handler.init(**kwargs)

-    def setup_data(self, handler: Union[dict, DataHandler], segments: list):
+    def setup_data(self, handler: Union[dict, DataHandler], segments: dict):
        """
        Setup the underlying data.

@@ -104,7 +104,7 @@ class DatasetH(Dataset):

            - config of `DataHandler`.  Please refer to `DataHandler`

-        segments : list
+        segments : dict
            Describe the options to segment the data.
            Here are some examples:

--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -57,7 +57,6 @@ class DataHandler(Serializable):
        instruments=None,
        start_time=None,
        end_time=None,
-        freq="day",
        data_loader: Tuple[dict, str, DataLoader] = None,
        init_data=True,
        fetch_orig=True,
@@ -71,8 +70,6 @@ class DataHandler(Serializable):
            start_time of the original data.
        end_time :
            end_time of the original data.
-        freq :
-            frequency of data
        data_loader : Tuple[dict, str, DataLoader]
            data loader to load the data.
        init_data :
@@ -86,23 +83,42 @@ class DataHandler(Serializable):
        # Setup data loader
        assert data_loader is not None  # to make start_time end_time could have None default value

+        # what data source to load data
        self.data_loader = init_instance_by_config(
            data_loader,
            None if (isinstance(data_loader, dict) and "module_path" in data_loader) else data_loader_module,
            accept_types=DataLoader,
        )

+        # what data to be loaded from data source
+        # For IDE auto-completion.
        self.instruments = instruments
        self.start_time = start_time
        self.end_time = end_time
-        self.freq = freq
+
        self.fetch_orig = fetch_orig
        if init_data:
            with TimeInspector.logt("Init data"):
                self.init()
        super().__init__()

-    def init(self, enable_cache: bool = True):
+    def conf_data(self, **kwargs):
+        """
+        configuration of data.
+        # what data to be loaded from data source
+
+        This method will be used when loading pickled handler from dataset.
+        The data will be initialized with different time range.
+
+        """
+        attr_list = {"instruments", "start_time", "end_time"}
+        for k, v in kwargs.items():
+            if k in attr_list:
+                setattr(self, k, v)
+            else:
+                raise KeyError("Such config is not supported.")
+
+    def init(self, enable_cache: bool = False):
        """
        initialize the data.
        In case of running intialization for multiple time, it will do nothing for the second time.
@@ -123,7 +139,7 @@ class DataHandler(Serializable):
        # Setup data.
        # _data may be with multiple column index level. The outer level indicates the feature set name
        with TimeInspector.logt("Loading data"):
-            self._data = self.data_loader.load(self.instruments, self.start_time, self.end_time, self.freq)
+            self._data = self.data_loader.load(self.instruments, self.start_time, self.end_time)
        # TODO: cache

    CS_ALL = "__all"  # return all columns with single-level index column
@@ -262,7 +278,6 @@ class DataHandlerLP(DataHandler):
        instruments=None,
        start_time=None,
        end_time=None,
-        freq="day",
        data_loader: Tuple[dict, str, DataLoader] = None,
        infer_processors=[],
        learn_processors=[],
@@ -328,7 +343,7 @@ class DataHandlerLP(DataHandler):

        self.process_type = process_type
        self.drop_raw = drop_raw
-        super().__init__(instruments, start_time, end_time, freq, data_loader, **kwargs)
+        super().__init__(instruments, start_time, end_time, data_loader, **kwargs)

    def get_all_processors(self):
        return self.infer_processors + self.learn_processors
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -10,7 +10,9 @@ import pandas as pd
 from typing import Tuple, Union

 from qlib.data import D
-from qlib.utils import load_dataset
+from qlib.data import filter as filter_module
+from qlib.data.filter import BaseDFilter
+from qlib.utils import load_dataset, init_instance_by_config


 class DataLoader(abc.ABC):
@@ -19,7 +21,7 @@ class DataLoader(abc.ABC):
    """

    @abc.abstractmethod
-    def load(self, instruments, start_time=None, end_time=None, freq="day") -> pd.DataFrame:
+    def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
        """
        load the data as pd.DataFrame.

@@ -76,6 +78,7 @@ class DLWParser(DataLoader):
                <config> := <fields_info>

                <fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
+                # NOTE: list or tuple will be treated as the things when parsing
        """
        self.is_group = isinstance(config, dict)

@@ -85,18 +88,22 @@ class DLWParser(DataLoader):
            self.fields = self._parse_fields_info(config)

    def _parse_fields_info(self, fields_info: Tuple[list, tuple]) -> Tuple[list, list]:
-        if isinstance(fields_info, list):
+        if len(fields_info) == 0:
+            raise ValueError("The size of fields must be greater than 0")
+
+        if not isinstance(fields_info, (list, tuple)):
+            raise TypeError("Unsupported type")
+
+        if isinstance(fields_info[0], str):
            exprs = names = fields_info
-        elif isinstance(fields_info, tuple):
+        elif isinstance(fields_info[0], (list, tuple)):
            exprs, names = fields_info
        else:
            raise NotImplementedError(f"This type of input is not supported")
        return exprs, names

    @abc.abstractmethod
-    def load_group_df(
-        self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day"
-    ) -> pd.DataFrame:
+    def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None) -> pd.DataFrame:
        """
        load the dataframe for specific group

@@ -116,25 +123,25 @@ class DLWParser(DataLoader):
        """
        pass

-    def load(self, instruments=None, start_time=None, end_time=None, freq="day") -> pd.DataFrame:
+    def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
        if self.is_group:
            df = pd.concat(
                {
-                    grp: self.load_group_df(instruments, exprs, names, start_time, end_time, freq)
+                    grp: self.load_group_df(instruments, exprs, names, start_time, end_time)
                    for grp, (exprs, names) in self.fields.items()
                },
                axis=1,
            )
        else:
            exprs, names = self.fields
-            df = self.load_group_df(instruments, exprs, names, start_time, end_time, freq)
+            df = self.load_group_df(instruments, exprs, names, start_time, end_time)
        return df


 class QlibDataLoader(DLWParser):
    """Same as QlibDataLoader. The fields can be define by config"""

-    def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None, swap_level=True):
+    def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None, swap_level=True, freq="day"):
        """
        Parameters
        ----------
@@ -145,13 +152,19 @@ class QlibDataLoader(DLWParser):
        swap_level :
            Whether to swap level of MultiIndex
        """
+        if filter_pipe is not None:
+            assert isinstance(filter_pipe, list), "The type of `filter_pipe` must be list."
+            filter_pipe = [
+                init_instance_by_config(fp, None if "module_path" in fp else filter_module, accept_types=BaseDFilter)
+                for fp in filter_pipe
+            ]
+
        self.filter_pipe = filter_pipe
        self.swap_level = swap_level
+        self.freq = freq
        super().__init__(config)

-    def load_group_df(
-        self, instruments, exprs: list, names: list, start_time=None, end_time=None, freq="day"
-    ) -> pd.DataFrame:
+    def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None) -> pd.DataFrame:
        if instruments is None:
            warnings.warn("`instruments` is not set, will load all stocks")
            instruments = "all"
@@ -160,7 +173,7 @@ class QlibDataLoader(DLWParser):
        elif self.filter_pipe is not None:
            warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")

-        df = D.features(instruments, exprs, start_time, end_time, freq)
+        df = D.features(instruments, exprs, start_time, end_time, self.freq)
        df.columns = names
        if self.swap_level:
            df = df.swaplevel().sort_index()  # NOTE: if swaplevel, return <datetime, instrument>
@@ -185,7 +198,7 @@ class StaticDataLoader(DataLoader):
        self.join = join
        self._data = None

-    def load(self, instruments=None, start_time=None, end_time=None, freq="day") -> pd.DataFrame:
+    def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
        self._maybe_load_raw_data()
        if instruments is None:
            df = self._data
--- a/qlib/workflow/init.py
+++ b/qlib/workflow/init.py
@@ -3,6 +3,7 @@

 from contextlib import contextmanager
 from .expm import MLflowExpManager
+from .exp import Experiment
 from .recorder import Recorder
 from ..utils import Wrapper

@@ -165,7 +166,7 @@ class QlibRecorder:
        """
        return self.get_exp(experiment_id, experiment_name).list_recorders()

-    def get_exp(self, experiment_id=None, experiment_name=None, create: bool = True):
+    def get_exp(self, experiment_id=None, experiment_name=None, create: bool = True) -> Experiment:
        """
        Method for retrieving an experiment with given id or name. Once the `create` argument is set to
        True, if no valid experiment is found, this method will create one for you. Otherwise, it will
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ NAME = "pyqlib"
 DESCRIPTION = "A Quantitative-research Platform"
 REQUIRES_PYTHON = ">=3.5.0"

-VERSION = "0.6.1.99"
+VERSION = "0.6.2"

 # Detect Cython
 try:
Author	SHA1	Message	Date
Young	97d354fa73	update version for releasing	2021-02-02 09:24:33 +00:00
Young	a87fb5a68c	fix contrib data freq	2021-02-02 16:52:50 +08:00
Young	835b47a7e7	simplify parameters	2021-02-02 16:52:50 +08:00
Young	802dac81c9	move `freq` params to dataloader	2021-02-02 16:52:50 +08:00
Wendi Li	bdc70c192a	Update pytorch_nn.py	2021-02-02 14:48:12 +08:00
Wendi Li	213f809148	Update pytorch_alstm_ts.py	2021-02-02 14:47:41 +08:00
Wendi Li	f3fd5e0773	Update pytorch_gats.py	2021-02-02 14:47:31 +08:00
Wendi Li	decf74cbdf	Update pytorch_gru.py	2021-02-02 14:47:20 +08:00
Wendi Li	b4a92d55f8	Update pytorch_gru_ts.py	2021-02-02 14:47:00 +08:00
Wendi Li	ebc31b9bdb	Update pytorch_lstm.py	2021-02-02 14:46:49 +08:00
Wendi Li	56ebe9bf36	Update pytorch_lstm_ts.py	2021-02-02 14:46:21 +08:00
Wendi Li	ddd68fc761	Update pytorch_alstm.py	2021-02-02 14:34:57 +08:00
bxdd	f50463aca9	Fix bug in alpha360	2021-02-01 18:33:51 +08:00
Jactus	c0e7cbc983	Add filter_pipe API	2021-01-29 12:47:04 +08:00