1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Fix us_index collector

This commit is contained in:
zhupr
2021-03-29 11:18:33 +08:00
parent 253378a44e
commit 4b66304978
2 changed files with 8 additions and 3 deletions

View File

@@ -114,6 +114,8 @@ class IndexBase:
$ python collector.py save_new_companies --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data
"""
df = self.get_new_companies()
if df is None or df.empty:
raise ValueError(f"get new companies error: {self.index_name}")
df = df.drop_duplicates([self.SYMBOL_FIELD_NAME])
df.loc[:, self.INSTRUMENTS_COLUMNS].to_csv(
self.instruments_dir.joinpath(f"{self.index_name.lower()}_only_new.txt"), sep="\t", index=False, header=None
@@ -184,7 +186,10 @@ class IndexBase:
logger.info(f"start parse {self.index_name.lower()} companies.....")
instruments_columns = [self.SYMBOL_FIELD_NAME, self.START_DATE_FIELD, self.END_DATE_FIELD]
changers_df = self.get_changes()
new_df = self.get_new_companies().copy()
new_df = self.get_new_companies()
if new_df is None or new_df.empty:
raise ValueError(f"get new companies error: {self.index_name}")
new_df = new_df.copy()
logger.info("parse history companies by changes......")
for _row in tqdm(changers_df.sort_values(self.DATE_FIELD_NAME, ascending=False).itertuples(index=False)):
if _row.type == self.ADD:

View File

@@ -35,7 +35,7 @@ WIKI_INDEX_NAME_MAP = {
class WIKIIndex(IndexBase):
# NOTE: The US stock code contains "PRN", and the directory cannot be created on Windows system, use the "_" prefix
# https://superuser.com/questions/613313/why-cant-we-make-con-prn-null-folder-in-windows
INST_PREFIX = "_"
INST_PREFIX = ""
def __init__(self, index_name: str, qlib_dir: [str, Path] = None, request_retry: int = 5, retry_sleep: int = 3):
super(WIKIIndex, self).__init__(
@@ -123,7 +123,7 @@ class NASDAQ100Index(WIKIIndex):
MAX_WORKERS = 16
def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
if not (set(df.columns) - {"Company", "Ticker"}):
if len(df) >= 100 and "Ticker" in df.columns:
return df.loc[:, ["Ticker"]].copy()
@property