mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
modify dump_update starts with the last end date of each symbol
This commit is contained in:
@@ -401,6 +401,8 @@ class DumpDataUpdate(DumpDataBase):
|
||||
)
|
||||
self._mode = self.UPDATE_MODE
|
||||
self._old_calendar_list = self._read_calendars(self._calendars_dir.joinpath(f"{self.freq}.txt"))
|
||||
# NOTE: all.txt only exists once for each stock
|
||||
# NOTE: if a stock corresponds to multiple different time ranges, user need to modify self._update_instruments
|
||||
self._update_instruments = (
|
||||
self._read_instruments(self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME))
|
||||
.set_index([self.symbol_field_name])
|
||||
@@ -409,10 +411,9 @@ class DumpDataUpdate(DumpDataBase):
|
||||
|
||||
# load all csv files
|
||||
self._all_data = self._load_all_source_data() # type: pd.DataFrame
|
||||
self._update_calendars = sorted(
|
||||
self._new_calendar_list = self._old_calendar_list + sorted(
|
||||
filter(lambda x: x > self._old_calendar_list[-1], self._all_data[self.date_field_name].unique())
|
||||
)
|
||||
self._new_calendar_list = self._old_calendar_list + self._update_calendars
|
||||
|
||||
def _load_all_source_data(self):
|
||||
# NOTE: Need more memory
|
||||
@@ -452,8 +453,16 @@ class DumpDataUpdate(DumpDataBase):
|
||||
if not (isinstance(_start, pd.Timestamp) and isinstance(_end, pd.Timestamp)):
|
||||
continue
|
||||
if _code in self._update_instruments:
|
||||
# exists stock, will append data
|
||||
_update_calendars = (
|
||||
_df[_df[self.date_field_name] > self._update_instruments[_code][self.INSTRUMENTS_START_FIELD]][
|
||||
self.date_field_name
|
||||
]
|
||||
.sort_values()
|
||||
.to_list()
|
||||
)
|
||||
self._update_instruments[_code][self.INSTRUMENTS_END_FIELD] = self._format_datetime(_end)
|
||||
futures[executor.submit(self._dump_bin, _df, self._update_calendars)] = _code
|
||||
futures[executor.submit(self._dump_bin, _df, _update_calendars)] = _code
|
||||
else:
|
||||
# new stock
|
||||
_dt_range = self._update_instruments.setdefault(_code, dict())
|
||||
|
||||
Reference in New Issue
Block a user