From 81987bb1436d2d3990662c89cb9c14e88270c0f9 Mon Sep 17 00:00:00 2001 From: bxdd <45119470+bxdd@users.noreply.github.com> Date: Tue, 9 Mar 2021 15:38:04 +0800 Subject: [PATCH 1/4] Update ops.py --- qlib/data/ops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 8bc7e1fa7..cbc101f47 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -74,7 +74,6 @@ class NpElemOperator(ElemOperator): """ def __init__(self, feature, func): - self.feature = feature self.func = func super(NpElemOperator, self).__init__(feature) @@ -289,8 +288,6 @@ class NpPairOperator(PairOperator): """ def __init__(self, feature_left, feature_right, func): - self.feature_left = feature_left - self.feature_right = feature_right self.func = func super(NpPairOperator, self).__init__(feature_left, feature_right) From 78bc2c874891b567e10740b9573f4a9e4c06f003 Mon Sep 17 00:00:00 2001 From: Jactus Date: Tue, 9 Mar 2021 17:31:27 +0800 Subject: [PATCH 2/4] Update Filter doc --- docs/component/data.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/component/data.rst b/docs/component/data.rst index 4b0962d49..9e5d7de2f 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -218,6 +218,25 @@ Filter - `cross-sectional features filter` \: rule_expression = '$rank($close)<10' - `time-sequence features filter`: rule_expression = '$Ref($close, 3)>100' +Here is a simple example showing how to use filter in a basic ``Qlib`` workflow configuration file: + +.. code-block:: yaml + + filter: &filter + filter_type: ExpressionDFilter + rule_expression: "Ref($close, -2) / Ref($close, -1) > 1" + filter_start_time: 2010-01-01 + filter_end_time: 2010-01-07 + keep: False + + data_handler_config: &data_handler_config + start_time: 2010-01-01 + end_time: 2021-01-22 + fit_start_time: 2010-01-01 + fit_end_time: 2015-12-31 + instruments: *market + filter_pipe: [*filter] + To know more about ``Filter``, please refer to `Filter API <../reference/api.html#module-qlib.data.filter>`_. Reference From 105fe1d3edde2ac35d88b2cc2154d6effd64339a Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 10 Mar 2021 10:38:43 +0800 Subject: [PATCH 3/4] Remove deprecated warning for numpy>=1.20.0 --- qlib/data/dataset/__init__.py | 2 +- qlib/utils/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index ecbeebc95..690436ba9 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -413,7 +413,7 @@ class TSDataSampler: # 1) for better performance, use the last nan line for padding the lost date # 2) In case of precision problems. We use np.float64. # TODO: I'm not sure if whether np.float64 will result in # precision problems. It will not cause any problems in my tests at least - indices = np.nan_to_num(indices.astype(np.float64), nan=self.nan_idx).astype(np.int) + indices = np.nan_to_num(indices.astype(np.float64), nan=self.nan_idx).astype(int) data = self.data_arr[indices] if isinstance(idx, mtit): diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index 6640dae2c..f550a0419 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -64,7 +64,7 @@ def np_ffill(arr: np.array): arr : np.array Input numpy 1D array """ - mask = np.isnan(arr.astype(np.float)) # np.isnan only works on np.float + mask = np.isnan(arr.astype(float)) # np.isnan only works on np.float # get fill index idx = np.where(~mask, np.arange(mask.shape[0]), 0) np.maximum.accumulate(idx, out=idx) From 119fe90570374c99332d6afd816c34c3c7884e29 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 10 Mar 2021 16:43:32 +0800 Subject: [PATCH 4/4] Fix pytorch ts model loader bug --- qlib/contrib/model/pytorch_alstm_ts.py | 8 ++++++-- qlib/contrib/model/pytorch_gats_ts.py | 4 ++-- qlib/contrib/model/pytorch_gru_ts.py | 8 ++++++-- qlib/contrib/model/pytorch_lstm_ts.py | 8 ++++++-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de8..ad3945368 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -210,8 +210,12 @@ class ALSTM(Model): dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader - train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs) - valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs) + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) if save_path == None: save_path = create_save_path(save_path) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 1e94f56e4..241debe61 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -258,8 +258,8 @@ class GATs(Model): sampler_train = DailyBatchSampler(dl_train) sampler_valid = DailyBatchSampler(dl_valid) - train_loader = DataLoader(dl_train, sampler=sampler_train, num_workers=self.n_jobs) - valid_loader = DataLoader(dl_valid, sampler=sampler_valid, num_workers=self.n_jobs) + train_loader = DataLoader(dl_train, sampler=sampler_train, num_workers=self.n_jobs, drop_last=True) + valid_loader = DataLoader(dl_valid, sampler=sampler_valid, num_workers=self.n_jobs, drop_last=True) if save_path == None: save_path = create_save_path(save_path) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b85..a0b240ef4 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -210,8 +210,12 @@ class GRU(Model): dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader - train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs) - valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs) + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) if save_path == None: save_path = create_save_path(save_path) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f..d0decc5fb 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -210,8 +210,12 @@ class LSTM(Model): dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader - train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs) - valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs) + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) if save_path == None: save_path = create_save_path(save_path)