diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index b3a18cc90..83daa28bc 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -401,6 +401,8 @@ class DumpDataUpdate(DumpDataBase): ) self._mode = self.UPDATE_MODE self._old_calendar_list = self._read_calendars(self._calendars_dir.joinpath(f"{self.freq}.txt")) + # NOTE: all.txt only exists once for each stock + # NOTE: if a stock corresponds to multiple different time ranges, user need to modify self._update_instruments self._update_instruments = ( self._read_instruments(self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME)) .set_index([self.symbol_field_name]) @@ -409,10 +411,9 @@ class DumpDataUpdate(DumpDataBase): # load all csv files self._all_data = self._load_all_source_data() # type: pd.DataFrame - self._update_calendars = sorted( + self._new_calendar_list = self._old_calendar_list + sorted( filter(lambda x: x > self._old_calendar_list[-1], self._all_data[self.date_field_name].unique()) ) - self._new_calendar_list = self._old_calendar_list + self._update_calendars def _load_all_source_data(self): # NOTE: Need more memory @@ -452,8 +453,16 @@ class DumpDataUpdate(DumpDataBase): if not (isinstance(_start, pd.Timestamp) and isinstance(_end, pd.Timestamp)): continue if _code in self._update_instruments: + # exists stock, will append data + _update_calendars = ( + _df[_df[self.date_field_name] > self._update_instruments[_code][self.INSTRUMENTS_START_FIELD]][ + self.date_field_name + ] + .sort_values() + .to_list() + ) self._update_instruments[_code][self.INSTRUMENTS_END_FIELD] = self._format_datetime(_end) - futures[executor.submit(self._dump_bin, _df, self._update_calendars)] = _code + futures[executor.submit(self._dump_bin, _df, _update_calendars)] = _code else: # new stock _dt_range = self._update_instruments.setdefault(_code, dict())