From 2c33332dd6d25fb430fa4366abc1eaa8dc80f1ee Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:48:44 +0800 Subject: [PATCH] More dataloader example (#1823) * More dataloader example * optimize code * optimeze code * optimeze code * optimeze code * optimeze code * optimeze code * fix pylint error * fix CI error * fix CI error * Comments * fix error type --------- Co-authored-by: Young --- .github/workflows/test_qlib_from_pip.yml | 2 +- .github/workflows/test_qlib_from_source.yml | 2 +- .../workflows/test_qlib_from_source_slow.yml | 2 +- qlib/data/dataset/loader.py | 14 +++++++- tests/data_mid_layer_tests/test_dataloader.py | 33 ++++++++++++++++++- 5 files changed, 48 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_qlib_from_pip.yml b/.github/workflows/test_qlib_from_pip.yml index fd1e8c4cf..029e292d3 100644 --- a/.github/workflows/test_qlib_from_pip.yml +++ b/.github/workflows/test_qlib_from_pip.yml @@ -16,7 +16,7 @@ jobs: # Since macos-latest changed from 12.7.4 to 14.4.1, # the minimum python version that matches a 14.4.1 version of macos is 3.10, # so we limit the macos version to macos-12. - os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12] + os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12] # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 python-version: [3.7, 3.8] diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml index 885d8fa43..8238db9bb 100644 --- a/.github/workflows/test_qlib_from_source.yml +++ b/.github/workflows/test_qlib_from_source.yml @@ -17,7 +17,7 @@ jobs: # Since macos-latest changed from 12.7.4 to 14.4.1, # the minimum python version that matches a 14.4.1 version of macos is 3.10, # so we limit the macos version to macos-12. - os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12] + os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12] # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 python-version: [3.7, 3.8] diff --git a/.github/workflows/test_qlib_from_source_slow.yml b/.github/workflows/test_qlib_from_source_slow.yml index 8725d4fe0..3401ea3fd 100644 --- a/.github/workflows/test_qlib_from_source_slow.yml +++ b/.github/workflows/test_qlib_from_source_slow.yml @@ -17,7 +17,7 @@ jobs: # Since macos-latest changed from 12.7.4 to 14.4.1, # the minimum python version that matches a 14.4.1 version of macos is 3.10, # so we limit the macos version to macos-12. - os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12] + os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12] # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129 python-version: [3.7, 3.8] diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index f2921124b..06e199bca 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -41,6 +41,7 @@ class DataLoader(abc.ABC): ---------- instruments : str or dict it can either be the market name or the config file of instruments generated by InstrumentProvider. + If the value of instruments is None, it means that no filtering is done. start_time : str start of the time range. end_time : str @@ -50,6 +51,11 @@ class DataLoader(abc.ABC): ------- pd.DataFrame: data load from the under layer source + + Raise + ----- + KeyError: + if the instruments filter is not supported, raise KeyError """ @@ -320,7 +326,13 @@ class NestedDataLoader(DataLoader): def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame: df_full = None for dl in self.data_loader_l: - df_current = dl.load(instruments, start_time, end_time) + try: + df_current = dl.load(instruments, start_time, end_time) + except KeyError: + warnings.warn( + "If the value of `instruments` cannot be processed, it will set instruments to None to get all the data." + ) + df_current = dl.load(instruments=None, start_time=start_time, end_time=end_time) if df_full is None: df_full = df_current else: diff --git a/tests/data_mid_layer_tests/test_dataloader.py b/tests/data_mid_layer_tests/test_dataloader.py index e3cb741bb..4d057be4f 100644 --- a/tests/data_mid_layer_tests/test_dataloader.py +++ b/tests/data_mid_layer_tests/test_dataloader.py @@ -7,8 +7,10 @@ import qlib from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent)) -from qlib.data.dataset.loader import NestedDataLoader +from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader +from qlib.data.dataset.handler import DataHandlerLP from qlib.contrib.data.loader import Alpha158DL, Alpha360DL +from qlib.data import D class TestDataLoader(unittest.TestCase): @@ -44,6 +46,35 @@ class TestDataLoader(unittest.TestCase): assert "LABEL0" in columns_list # Then you can use it wth DataHandler; + # NOTE: please note that the data processors are missing!!! You should add based on your requirements + + """ + dataset.to_pickle("test_df.pkl") + nested_data_loader = NestedDataLoader( + dataloader_l=[ + { + "class": "qlib.contrib.data.loader.Alpha158DL", + "kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}}, + }, + { + "class": "qlib.contrib.data.loader.Alpha360DL", + }, + { + "class": "qlib.data.dataset.loader.StaticDataLoader", + "kwargs": {"config": "test_df.pkl"}, + }, + ] + ) + data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "instruments": "csi300", + "data_loader": nested_data_loader, + } + data_handler = DataHandlerLP(**data_handler_config) + data = data_handler.fetch() + print(data) + """ if __name__ == "__main__":