1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix_macos_CI (#1081)

Co-authored-by: Linlang Lv (iSoftStone) <v-linlanglv@microsoft.com>
This commit is contained in:
Linlang
2022-04-29 17:04:28 +08:00
committed by GitHub
parent 3c9c76b384
commit 5d579d1a20
5 changed files with 47 additions and 38 deletions

View File

@@ -35,11 +35,10 @@ jobs:
# Test Qlib installed with pip # Test Qlib installed with pip
- name: Check Qlib with flake8 - name: Check Qlib with flake8
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install flake8 pip install flake8
cd .. flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib
flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 qlib
- name: Install Qlib with pip - name: Install Qlib with pip
run: | run: |

View File

@@ -55,13 +55,13 @@ class IBOVIndex(IndexBase):
def get_current_4_month_period(self, current_month: int): def get_current_4_month_period(self, current_month: int):
""" """
This function is used to calculated what is the current This function is used to calculated what is the current
four month period for the current month. For example, four month period for the current month. For example,
If the current month is August 8, its four month period If the current month is August 8, its four month period
is 2Q. is 2Q.
OBS: In english Q is used to represent *quarter* OBS: In english Q is used to represent *quarter*
which means a three month period. However, in which means a three month period. However, in
portuguese we use Q to represent a four month period. portuguese we use Q to represent a four month period.
In other words, In other words,
@@ -90,8 +90,8 @@ class IBOVIndex(IndexBase):
def get_four_month_period(self): def get_four_month_period(self):
""" """
The ibovespa index is updated every four months. The ibovespa index is updated every four months.
Therefore, we will represent each time period as 2003_1Q Therefore, we will represent each time period as 2003_1Q
which means 2003 first four mount period (Jan, Feb, Mar, Apr) which means 2003 first four mount period (Jan, Feb, Mar, Apr)
""" """
four_months_period = ["1Q", "2Q", "3Q"] four_months_period = ["1Q", "2Q", "3Q"]
@@ -101,14 +101,13 @@ class IBOVIndex(IndexBase):
current_month = now.month current_month = now.month
for year in [item for item in range(init_year, current_year)]: for year in [item for item in range(init_year, current_year)]:
for el in four_months_period: for el in four_months_period:
self.years_4_month_periods.append(str(year)+"_"+el) self.years_4_month_periods.append(str(year) + "_" + el)
# For current year the logic must be a little different # For current year the logic must be a little different
current_4_month_period = self.get_current_4_month_period(current_month) current_4_month_period = self.get_current_4_month_period(current_month)
for i in range(int(current_4_month_period[0])): for i in range(int(current_4_month_period[0])):
self.years_4_month_periods.append(str(current_year) + "_" + str(i+1) + "Q") self.years_4_month_periods.append(str(current_year) + "_" + str(i + 1) + "Q")
return self.years_4_month_periods return self.years_4_month_periods
def format_datetime(self, inst_df: pd.DataFrame) -> pd.DataFrame: def format_datetime(self, inst_df: pd.DataFrame) -> pd.DataFrame:
"""formatting the datetime in an instrument """formatting the datetime in an instrument
@@ -189,11 +188,19 @@ class IBOVIndex(IndexBase):
try: try:
df_changes_list = [] df_changes_list = []
for i in tqdm(range(len(self.years_4_month_periods) - 1)): for i in tqdm(range(len(self.years_4_month_periods) - 1)):
df = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip")["symbol"] df = pd.read_csv(
df_ = pd.read_csv(self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip")["symbol"] self.ibov_index_composition.format(self.years_4_month_periods[i]), on_bad_lines="skip"
)["symbol"]
df_ = pd.read_csv(
self.ibov_index_composition.format(self.years_4_month_periods[i + 1]), on_bad_lines="skip"
)["symbol"]
## Remove Dataframe ## Remove Dataframe
remove_date = self.years_4_month_periods[i].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i].split("_")[1]] remove_date = (
self.years_4_month_periods[i].split("_")[0]
+ "-"
+ quarter_dict[self.years_4_month_periods[i].split("_")[1]]
)
list_remove = list(df[~df.isin(df_)]) list_remove = list(df[~df.isin(df_)])
df_removed = pd.DataFrame( df_removed = pd.DataFrame(
{ {
@@ -204,7 +211,11 @@ class IBOVIndex(IndexBase):
) )
## Add Dataframe ## Add Dataframe
add_date = self.years_4_month_periods[i + 1].split("_")[0] + "-" + quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]] add_date = (
self.years_4_month_periods[i + 1].split("_")[0]
+ "-"
+ quarter_dict[self.years_4_month_periods[i + 1].split("_")[1]]
)
list_add = list(df_[~df_.isin(df)]) list_add = list(df_[~df_.isin(df)])
df_added = pd.DataFrame( df_added = pd.DataFrame(
{"date": len(list_add) * [add_date], "type": len(list_add) * ["add"], "symbol": list_add} {"date": len(list_add) * [add_date], "type": len(list_add) * ["add"], "symbol": list_add}
@@ -272,6 +283,5 @@ class IBOVIndex(IndexBase):
return df.loc[:, ["Código"]].copy() return df.loc[:, ["Código"]].copy()
if __name__ == "__main__": if __name__ == "__main__":
fire.Fire(partial(get_instruments, market_index="br_index" )) fire.Fire(partial(get_instruments, market_index="br_index"))

View File

@@ -271,6 +271,5 @@ class SP400Index(WIKIIndex):
logger.warning(f"No suitable data source has been found!") logger.warning(f"No suitable data source has been found!")
if __name__ == "__main__": if __name__ == "__main__":
fire.Fire(partial(get_instruments, market_index="us_index")) fire.Fire(partial(get_instruments, market_index="us_index"))

View File

@@ -559,6 +559,7 @@ def generate_minutes_calendar_from_daily(
return pd.Index(sorted(set(np.hstack(res)))) return pd.Index(sorted(set(np.hstack(res))))
def get_instruments( def get_instruments(
qlib_dir: str, qlib_dir: str,
index_name: str, index_name: str,
@@ -566,7 +567,7 @@ def get_instruments(
freq: str = "day", freq: str = "day",
request_retry: int = 5, request_retry: int = 5,
retry_sleep: int = 3, retry_sleep: int = 3,
market_index: str = "cn_index" market_index: str = "cn_index",
): ):
""" """
@@ -585,7 +586,7 @@ def get_instruments(
retry_sleep: int retry_sleep: int
request sleep, by default 3 request sleep, by default 3
market_index: str market_index: str
Where the files to obtain the index are located, Where the files to obtain the index are located,
for example data_collector.cn_index.collector for example data_collector.cn_index.collector
Examples Examples
@@ -605,4 +606,4 @@ def get_instruments(
if __name__ == "__main__": if __name__ == "__main__":
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM

View File

@@ -317,24 +317,24 @@ class YahooCollectorIN1min(YahooCollectorIN):
class YahooCollectorBR(YahooCollector, ABC): class YahooCollectorBR(YahooCollector, ABC):
def retry(cls): def retry(cls):
"""" """
The reason to use retry=2 is due to the fact that The reason to use retry=2 is due to the fact that
Yahoo Finance unfortunately does not keep track of some Yahoo Finance unfortunately does not keep track of some
Brazilian stocks. Brazilian stocks.
Therefore, the decorator deco_retry with retry argument
set to 5 will keep trying to get the stock data up to 5 times,
which makes the code to download Brazilians stocks very slow.
In future, this may change, but for now
I suggest to leave retry argument to 1 or 2 in
order to improve download speed.
To achieve this goal an abstract attribute (retry) Therefore, the decorator deco_retry with retry argument
was added into YahooCollectorBR base class set to 5 will keep trying to get the stock data up to 5 times,
which makes the code to download Brazilians stocks very slow.
In future, this may change, but for now
I suggest to leave retry argument to 1 or 2 in
order to improve download speed.
To achieve this goal an abstract attribute (retry)
was added into YahooCollectorBR base class
""" """
raise NotImplementedError raise NotImplementedError
def get_instrument_list(self): def get_instrument_list(self):
logger.info("get BR stock symbols......") logger.info("get BR stock symbols......")
symbols = get_br_stock_symbols() + [ symbols = get_br_stock_symbols() + [