From d3160e94399ef395fdd12116b416a84bea3a87b4 Mon Sep 17 00:00:00 2001 From: wangershi Date: Thu, 18 Mar 2021 21:15:45 +0800 Subject: [PATCH] remove some useless code --- scripts/data_collector/fund/collector.py | 12 ++---------- scripts/data_collector/utils.py | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/scripts/data_collector/fund/collector.py b/scripts/data_collector/fund/collector.py index 1e0d2d8bf..10800a7a3 100644 --- a/scripts/data_collector/fund/collector.py +++ b/scripts/data_collector/fund/collector.py @@ -151,10 +151,6 @@ class FundCollector(BaseCollector): raise ValueError(f"cannot support {interval}") return _result - def collector_data(self): - """collector data""" - super(FundCollector, self).collector_data() - class FundollectorCN(FundCollector, ABC): def get_instrument_list(self): @@ -213,12 +209,8 @@ class FundNormalize(BaseNormalize): return df -class FundNormalize1d(FundNormalize, ABC): - DAILY_FORMAT = "%Y-%m-%d" - - def normalize(self, df: pd.DataFrame) -> pd.DataFrame: - df = super(FundNormalize1d, self).normalize(df) - return df +class FundNormalize1d(FundNormalize): + pass class FundNormalizeCN: diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index ed14ad6e1..e8c9b9dc4 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -98,9 +98,8 @@ def get_calendar_list(bench_code="CSI300") -> list: return calendar -def return_date_list(source_dir, date_field_name: str, file_path: Path): - file_path = Path(file_path) - date_list = pd.read_csv(Path(source_dir).joinpath(file_path), sep=",", index_col=0)[date_field_name].to_list() +def return_date_list(date_field_name: str, file_path: Path): + date_list = pd.read_csv(file_path, sep=",", index_col=0)[date_field_name].to_list() return sorted(map(lambda x: pd.Timestamp(x), date_list)) @@ -139,10 +138,13 @@ def get_calendar_list_by_ratio( logger.info(f"count how many funds trade in this day......") _dict_count_trade = dict() # dict{date:count} - _fun = partial(return_date_list, source_dir, date_field_name) + _fun = partial(return_date_list, date_field_name) + all_oldest_list = [] with tqdm(total=_number_all_funds) as p_bar: with ProcessPoolExecutor(max_workers=max_workers) as executor: - for date_list in executor.map(_fun, file_list[:_number_all_funds]): + for date_list in executor.map(_fun, file_list): + if date_list: + all_oldest_list.append(date_list[0]) for date in date_list: if date not in _dict_count_trade.keys(): _dict_count_trade[date] = 0 @@ -154,12 +156,10 @@ def get_calendar_list_by_ratio( logger.info(f"count how many funds have founded in this day......") _dict_count_founding = {date: _number_all_funds for date in _dict_count_trade.keys()} # dict{date:count} with tqdm(total=_number_all_funds) as p_bar: - with ProcessPoolExecutor(max_workers=max_workers) as executor: - for date_list in executor.map(_fun, file_list[:_number_all_funds]): - oldest_date = sorted(date_list)[0] # this fund haven't found before this day - for date in _dict_count_founding.keys(): - if date < oldest_date: - _dict_count_founding[date] -= 1 + for oldest_date in all_oldest_list: + for date in _dict_count_founding.keys(): + if date < oldest_date: + _dict_count_founding[date] -= 1 calendar = [ date