From 4b663049781ff9bc022a5e095772888965d27c91 Mon Sep 17 00:00:00 2001 From: zhupr Date: Mon, 29 Mar 2021 11:18:33 +0800 Subject: [PATCH] Fix us_index collector --- scripts/data_collector/index.py | 7 ++++++- scripts/data_collector/us_index/collector.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/data_collector/index.py b/scripts/data_collector/index.py index 300e6b625..82a230e37 100644 --- a/scripts/data_collector/index.py +++ b/scripts/data_collector/index.py @@ -114,6 +114,8 @@ class IndexBase: $ python collector.py save_new_companies --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data """ df = self.get_new_companies() + if df is None or df.empty: + raise ValueError(f"get new companies error: {self.index_name}") df = df.drop_duplicates([self.SYMBOL_FIELD_NAME]) df.loc[:, self.INSTRUMENTS_COLUMNS].to_csv( self.instruments_dir.joinpath(f"{self.index_name.lower()}_only_new.txt"), sep="\t", index=False, header=None @@ -184,7 +186,10 @@ class IndexBase: logger.info(f"start parse {self.index_name.lower()} companies.....") instruments_columns = [self.SYMBOL_FIELD_NAME, self.START_DATE_FIELD, self.END_DATE_FIELD] changers_df = self.get_changes() - new_df = self.get_new_companies().copy() + new_df = self.get_new_companies() + if new_df is None or new_df.empty: + raise ValueError(f"get new companies error: {self.index_name}") + new_df = new_df.copy() logger.info("parse history companies by changes......") for _row in tqdm(changers_df.sort_values(self.DATE_FIELD_NAME, ascending=False).itertuples(index=False)): if _row.type == self.ADD: diff --git a/scripts/data_collector/us_index/collector.py b/scripts/data_collector/us_index/collector.py index 0641437e0..371668330 100644 --- a/scripts/data_collector/us_index/collector.py +++ b/scripts/data_collector/us_index/collector.py @@ -35,7 +35,7 @@ WIKI_INDEX_NAME_MAP = { class WIKIIndex(IndexBase): # NOTE: The US stock code contains "PRN", and the directory cannot be created on Windows system, use the "_" prefix # https://superuser.com/questions/613313/why-cant-we-make-con-prn-null-folder-in-windows - INST_PREFIX = "_" + INST_PREFIX = "" def __init__(self, index_name: str, qlib_dir: [str, Path] = None, request_retry: int = 5, retry_sleep: int = 3): super(WIKIIndex, self).__init__( @@ -123,7 +123,7 @@ class NASDAQ100Index(WIKIIndex): MAX_WORKERS = 16 def filter_df(self, df: pd.DataFrame) -> pd.DataFrame: - if not (set(df.columns) - {"Company", "Ticker"}): + if len(df) >= 100 and "Ticker" in df.columns: return df.loc[:, ["Ticker"]].copy() @property