fix: value error caused by incorrect date format in daily data (#2015)

Co-authored-by: Linlang Lv (iSoftStone Information) <v-llv@microsoft.com>
2026-07-22 03:37:34 +08:00 · 2026-04-15 17:07:00 +08:00
parent 3097dcc995
commit b87a2c294d
1 changed files with 20 additions and 7 deletions
--- a/scripts/data_collector/base.py
+++ b/scripts/data_collector/base.py
@@ -280,11 +280,20 @@ class Normalize:
        self._symbol_field_name = symbol_field_name
        self._end_date = kwargs.get("end_date", None)
        self._max_workers = max_workers
+        self.interval = kwargs.get("interval", "1d")

        self._normalize_obj = normalize_class(
            date_field_name=date_field_name, symbol_field_name=symbol_field_name, **kwargs
        )

+    def format_data(self, df: pd.DataFrame):
+        if self.interval == "1d":
+            try:
+                pd.to_datetime(df.iloc[-1]["date"], format="%Y-%m-%d", errors="raise")
+            except Exception:
+                df = df.iloc[:-1]
+        return df
+
    def _executor(self, file_path: Path):
        file_path = Path(file_path)

@@ -300,14 +309,18 @@ class Normalize:
            keep_default_na=False,
            na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
        )
+        df = self.format_data(df=df)

-        # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
-        df = self._normalize_obj.normalize(df)
-        if df is not None and not df.empty:
-            if self._end_date is not None:
-                _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
-                df = df[_mask]
-            df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
+        if not df.empty:
+            # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
+            df = self._normalize_obj.normalize(df)
+            if df is not None and not df.empty:
+                if self._end_date is not None:
+                    _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
+                    df = df[_mask]
+                df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
+        else:
+            logger.warning(f"{file_path.stem} source data is empty and will not undergo normalization processing.")

    def normalize(self):
        logger.info("normalize data......")