mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
fix: value error caused by incorrect date format in daily data (#2015)
Co-authored-by: Linlang Lv (iSoftStone Information) <v-llv@microsoft.com>
This commit is contained in:
@@ -280,11 +280,20 @@ class Normalize:
|
||||
self._symbol_field_name = symbol_field_name
|
||||
self._end_date = kwargs.get("end_date", None)
|
||||
self._max_workers = max_workers
|
||||
self.interval = kwargs.get("interval", "1d")
|
||||
|
||||
self._normalize_obj = normalize_class(
|
||||
date_field_name=date_field_name, symbol_field_name=symbol_field_name, **kwargs
|
||||
)
|
||||
|
||||
def format_data(self, df: pd.DataFrame):
|
||||
if self.interval == "1d":
|
||||
try:
|
||||
pd.to_datetime(df.iloc[-1]["date"], format="%Y-%m-%d", errors="raise")
|
||||
except Exception:
|
||||
df = df.iloc[:-1]
|
||||
return df
|
||||
|
||||
def _executor(self, file_path: Path):
|
||||
file_path = Path(file_path)
|
||||
|
||||
@@ -300,14 +309,18 @@ class Normalize:
|
||||
keep_default_na=False,
|
||||
na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
|
||||
)
|
||||
df = self.format_data(df=df)
|
||||
|
||||
# NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
|
||||
df = self._normalize_obj.normalize(df)
|
||||
if df is not None and not df.empty:
|
||||
if self._end_date is not None:
|
||||
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
|
||||
df = df[_mask]
|
||||
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
|
||||
if not df.empty:
|
||||
# NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
|
||||
df = self._normalize_obj.normalize(df)
|
||||
if df is not None and not df.empty:
|
||||
if self._end_date is not None:
|
||||
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
|
||||
df = df[_mask]
|
||||
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
|
||||
else:
|
||||
logger.warning(f"{file_path.stem} source data is empty and will not undergo normalization processing.")
|
||||
|
||||
def normalize(self):
|
||||
logger.info("normalize data......")
|
||||
|
||||
Reference in New Issue
Block a user