From c97a96363d378051c2a25bb2c60792fb90667fdc Mon Sep 17 00:00:00 2001 From: zhupr Date: Thu, 15 Jul 2021 22:12:53 +0800 Subject: [PATCH] Add a check if change is mutated to YahooNormalize1d --- scripts/data_collector/yahoo/collector.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index e0e6e0368..feb28a94f 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -325,9 +325,22 @@ class YahooNormalize(BaseNormalize): # NOTE: The data obtained by Yahoo finance sometimes has exceptions # WARNING: If it is normal for a `symbol(exchange)` to differ by a factor of *89* to *111* for consecutive trading days, # WARNING: the logic in the following line needs to be modified - _mask = (change_series >= 89) & (change_series <= 111) - _tmp_cols = ["high", "close", "low", "open", "adjclose"] - df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100 + _count = 0 + while True: + # NOTE: may appear unusual for many days in a row + change_series = YahooNormalize.calc_change(df, last_close) + _mask = (change_series >= 89) & (change_series <= 111) + if not _mask.any(): + break + _tmp_cols = ["high", "close", "low", "open", "adjclose"] + df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100 + _count += 1 + if _count >= 10: + _symbol = df.loc[df[symbol_field_name].first_valid_index()]["symbol"] + logger.warning( + f"{_symbol} `change` is abnormal for {_count} consecutive days, please check the specific data file carefully" + ) + df["change"] = YahooNormalize.calc_change(df, last_close) columns += ["change"]