diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml index 5d9f3768e..599230370 100644 --- a/.github/workflows/test_macos.yml +++ b/.github/workflows/test_macos.yml @@ -35,11 +35,10 @@ jobs: # Test Qlib installed with pip - name: Check Qlib with flake8 - run: | - pip install --upgrade pip - pip install flake8 - cd .. - flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib + run: | + pip install --upgrade pip + pip install flake8 + flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib - name: Install Qlib with pip run: | diff --git a/scripts/data_collector/br_index/collector.py b/scripts/data_collector/br_index/collector.py index bbb012b5c..0dc12eff6 100644 --- a/scripts/data_collector/br_index/collector.py +++ b/scripts/data_collector/br_index/collector.py @@ -55,13 +55,13 @@ class IBOVIndex(IndexBase): def get_current_4_month_period(self, current_month: int): """ - This function is used to calculated what is the current - four month period for the current month. For example, + This function is used to calculated what is the current + four month period for the current month. For example, If the current month is August 8, its four month period is 2Q. OBS: In english Q is used to represent *quarter* - which means a three month period. However, in + which means a three month period. However, in portuguese we use Q to represent a four month period. In other words, @@ -90,8 +90,8 @@ class IBOVIndex(IndexBase): def get_four_month_period(self): """ - The ibovespa index is updated every four months. - Therefore, we will represent each time period as 2003_1Q + The ibovespa index is updated every four months. + Therefore, we will represent each time period as 2003_1Q which means 2003 first four mount period (Jan, Feb, Mar, Apr) """ four_months_period = ["1Q", "2Q", "3Q"] @@ -101,14 +101,13 @@ class IBOVIndex(IndexBase): current_month = now.month for year in [item for item in range(init_year, current_year)]: for el in four_months_period: - self.years_4_month_periods.append(str(year)+"_"+el) + self.years_4_month_periods.append(str(year) + "_" + el) # For current year the logic must be a little different current_4_month_period = self.get_current_4_month_period(current_month) for i in range(int(current_4_month_period[0])): - self.years_4_month_periods.append(str(current_year) + "_" + str(i+1) + "Q") + self.years_4_month_periods.append(str(current_year) + "_" + str(i + 1) + "Q") return self.years_4_month_periods - def format_datetime(self, inst_df: pd.DataFrame) -> pd.DataFrame: """formatting the datetime in an instrument @@ -189,11 +188,19 @@ class IBOVIndex(IndexBase): try: df_changes_list = [] for i in tqdm(range(len(self.years_4_month_periods) - 1)): - df = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip")["symbol"] - df_ = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip")["symbol"] + df = pd.read_csv( + self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip" + )["symbol"] + df_ = pd.read_csv( + self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip" + )["symbol"] ## Remove Dataframe - remove_date = self.years_4_month_periods[i].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i].split("_")[1]] + remove_date = ( + self.years_4_month_periods[i].split("_")[0] + + "-" + + quarter_dict[self.years_4_month_periods[i].split("_")[1]] + ) list_remove = list(df[~df.isin(df_)]) df_removed = pd.DataFrame( { @@ -204,7 +211,11 @@ class IBOVIndex(IndexBase): ) ## Add Dataframe - add_date = self.years_4_month_periods[i + 1].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]] + add_date = ( + self.years_4_month_periods[i + 1].split("_")[0] + + "-" + + quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]] + ) list_add = list(df_[~df_.isin(df)]) df_added = pd.DataFrame( {"date": len(list_add) * [add_date], "type": len(list_add) * ["add"], "symbol": list_add} @@ -272,6 +283,5 @@ class IBOVIndex(IndexBase): return df.loc[:, ["Código"]].copy() - if __name__ == "__main__": - fire.Fire(partial(get_instruments, market_index="br_index" )) + fire.Fire(partial(get_instruments, market_index="br_index")) diff --git a/scripts/data_collector/us_index/collector.py b/scripts/data_collector/us_index/collector.py index 06c48f8f6..97cbce825 100644 --- a/scripts/data_collector/us_index/collector.py +++ b/scripts/data_collector/us_index/collector.py @@ -271,6 +271,5 @@ class SP400Index(WIKIIndex): logger.warning(f"No suitable data source has been found!") - if __name__ == "__main__": fire.Fire(partial(get_instruments, market_index="us_index")) diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index 7ef1cdf95..74ecb541e 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -559,6 +559,7 @@ def generate_minutes_calendar_from_daily( return pd.Index(sorted(set(np.hstack(res)))) + def get_instruments( qlib_dir: str, index_name: str, @@ -566,7 +567,7 @@ def get_instruments( freq: str = "day", request_retry: int = 5, retry_sleep: int = 3, - market_index: str = "cn_index" + market_index: str = "cn_index", ): """ @@ -585,7 +586,7 @@ def get_instruments( retry_sleep: int request sleep, by default 3 market_index: str - Where the files to obtain the index are located, + Where the files to obtain the index are located, for example data_collector.cn_index.collector Examples @@ -605,4 +606,4 @@ def get_instruments( if __name__ == "__main__": - assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM \ No newline at end of file + assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 6c5fe44ee..26a0a8893 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -317,24 +317,24 @@ class YahooCollectorIN1min(YahooCollectorIN): class YahooCollectorBR(YahooCollector, ABC): def retry(cls): - """" - The reason to use retry=2 is due to the fact that - Yahoo Finance unfortunately does not keep track of some - Brazilian stocks. - - Therefore, the decorator deco_retry with retry argument - set to 5 will keep trying to get the stock data up to 5 times, - which makes the code to download Brazilians stocks very slow. - - In future, this may change, but for now - I suggest to leave retry argument to 1 or 2 in - order to improve download speed. + """ + The reason to use retry=2 is due to the fact that + Yahoo Finance unfortunately does not keep track of some + Brazilian stocks. - To achieve this goal an abstract attribute (retry) - was added into YahooCollectorBR base class + Therefore, the decorator deco_retry with retry argument + set to 5 will keep trying to get the stock data up to 5 times, + which makes the code to download Brazilians stocks very slow. + + In future, this may change, but for now + I suggest to leave retry argument to 1 or 2 in + order to improve download speed. + + To achieve this goal an abstract attribute (retry) + was added into YahooCollectorBR base class """ raise NotImplementedError - + def get_instrument_list(self): logger.info("get BR stock symbols......") symbols = get_br_stock_symbols() + [