1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

modify dump_update starts with the last end date of each symbol

This commit is contained in:
zhupr
2021-06-17 22:33:31 +08:00
parent b4efbd53b2
commit a4f6e04199

View File

@@ -401,6 +401,8 @@ class DumpDataUpdate(DumpDataBase):
)
self._mode = self.UPDATE_MODE
self._old_calendar_list = self._read_calendars(self._calendars_dir.joinpath(f"{self.freq}.txt"))
# NOTE: all.txt only exists once for each stock
# NOTE: if a stock corresponds to multiple different time ranges, user need to modify self._update_instruments
self._update_instruments = (
self._read_instruments(self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME))
.set_index([self.symbol_field_name])
@@ -409,10 +411,9 @@ class DumpDataUpdate(DumpDataBase):
# load all csv files
self._all_data = self._load_all_source_data() # type: pd.DataFrame
self._update_calendars = sorted(
self._new_calendar_list = self._old_calendar_list + sorted(
filter(lambda x: x > self._old_calendar_list[-1], self._all_data[self.date_field_name].unique())
)
self._new_calendar_list = self._old_calendar_list + self._update_calendars
def _load_all_source_data(self):
# NOTE: Need more memory
@@ -452,8 +453,16 @@ class DumpDataUpdate(DumpDataBase):
if not (isinstance(_start, pd.Timestamp) and isinstance(_end, pd.Timestamp)):
continue
if _code in self._update_instruments:
# exists stock, will append data
_update_calendars = (
_df[_df[self.date_field_name] > self._update_instruments[_code][self.INSTRUMENTS_START_FIELD]][
self.date_field_name
]
.sort_values()
.to_list()
)
self._update_instruments[_code][self.INSTRUMENTS_END_FIELD] = self._format_datetime(_end)
futures[executor.submit(self._dump_bin, _df, self._update_calendars)] = _code
futures[executor.submit(self._dump_bin, _df, _update_calendars)] = _code
else:
# new stock
_dt_range = self._update_instruments.setdefault(_code, dict())