diff --git a/qlib/config.py b/qlib/config.py index 344eb8527..52b05568d 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,7 +115,12 @@ _default_config = { "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, + "filters": { + "field_not_found": { + "()": "qlib.log.LogFilter", + "param": [".*?WARN: data not found for.*?"], + } + }, "handlers": { "console": { "class": "logging.StreamHandler", @@ -130,7 +135,10 @@ _default_config = { "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, + "kwargs": { + "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), + "default_exp_name": "Experiment", + }, }, } @@ -192,8 +200,16 @@ HIGH_FREQ_CONFIG = { } _default_region_config = { - REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, - REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, + REG_CN: { + "trade_unit": 100, + "limit_threshold": 0.099, + "deal_price": "vwap", + }, + REG_US: { + "trade_unit": 1, + "limit_threshold": None, + "deal_price": "close", + }, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index bd3494abf..aa24ffb0c 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,7 +18,13 @@ logger = get_module_logger("backtest caller") def get_strategy( - strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, + strategy=None, + topk=50, + margin=0.5, + n_drop=5, + risk_degree=0.95, + str_type="dropout", + adjust_dates=None, ): """get_strategy @@ -69,7 +75,11 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, + topk=topk, + buffer_margin=margin, + n_drop=n_drop, + risk_degree=risk_degree, + adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -162,7 +172,9 @@ def get_exchange( def get_executor( - executor=None, trade_exchange=None, verbose=True, + executor=None, + trade_exchange=None, + verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 355f06373..20c6f638f 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,7 +12,10 @@ from pathlib import Path def get_benchmark_weight( - bench, start_date=None, end_date=None, path=None, + bench, + start_date=None, + end_date=None, + path=None, ): """get_benchmark_weight @@ -213,7 +216,12 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", + positions, + bench="SH000905", + group_field="industry", + group_method="category", + group_n=None, + deal_price="vwap", ): """brinson profit attribution @@ -247,10 +255,17 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, + D.instruments(market="all"), + start_time=shift_start_date, + end_time=end_date, + as_list=True, ) stock_df = D.features( - instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", + instruments, + [group_field, deal_price], + start_time=shift_start_date, + end_time=end_date, + freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 574287819..970b032d6 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,7 +21,10 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} + { + "fit_start_time": fit_start_time, + "fit_end_time": fit_end_time, + } ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -167,7 +170,10 @@ class Alpha158(DataHandlerLP): def get_feature_config(self): conf = { "kbar": {}, - "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, + "price": { + "windows": [0], + "feature": ["OPEN", "HIGH", "LOW", "VWAP"], + }, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index 363a18458..c68571853 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,7 +35,11 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, + pred: pd.Series, + label: pd.Series, + date_col: str = "datetime", + quantile: float = 0.2, + dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 5cb1ce4eb..4aa5b5515 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,7 +244,12 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) + return dict( + zip( + ["long", "short", "long_short"], + map(pd.Series, [long_returns, short_returns, ls_returns]), + ) + ) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 2d94105e4..04ddd8db0 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,7 +64,12 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, + instruments, + fields, + start_time=evaluate_date, + end_time=evaluate_date, + freq="day", + disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -82,7 +87,14 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) + close_data_df = D.features( + instruments, + fields, + start_time=start_date, + end_time=end_date, + freq="day", + disk_cache=0, + ) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index 2840c2cef..d57c32b70 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,7 +32,9 @@ class CatBoostModel(Model): **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 306e68aad..bbbb61851 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,7 +118,10 @@ class ALSTM(Model): torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ class ALSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -319,12 +328,14 @@ class ALSTMModel(nn.Module): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 612bacbec..725568de8 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,7 +123,10 @@ class ALSTM(Model): torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ class ALSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -302,12 +309,14 @@ class ALSTMModel(nn.Module): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", + nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", + nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index c59dc9197..07048e1bc 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,11 +229,17 @@ class GATs(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -334,11 +340,19 @@ class GATModel(nn.Module): if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index dfc5f4ab5..1e94f56e4 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,7 +242,11 @@ class GATs(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -357,11 +361,19 @@ class GATModel(nn.Module): if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index d2a774b65..84f863b9f 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,7 +118,10 @@ class GRU(Model): torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ class GRU(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ class GRUModel(nn.Module): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 49f438cc3..bb6618b85 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,7 +123,10 @@ class GRU(Model): torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ class GRU(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ class GRUModel(nn.Module): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 02ca16e36..163d500ec 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,7 +118,10 @@ class LSTM(Model): torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -208,11 +211,17 @@ class LSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -296,7 +305,11 @@ class LSTMModel(nn.Module): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index 2ec36f96e..cf4f8fb9f 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,7 +123,10 @@ class LSTM(Model): torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, + d_feat=self.d_feat, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -195,7 +198,11 @@ class LSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, dataset, evals_result=dict(), verbose=True, save_path=None, + self, + dataset, + evals_result=dict(), + verbose=True, + save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -279,7 +286,11 @@ class LSTMModel(nn.Module): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 8c1a77ec3..16fcea9ff 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,7 +154,11 @@ class DNNModelPytorch(Model): self.dnn_model.to(self.device) def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index 1f7433e05..d5169e6c7 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,7 +30,14 @@ from ...data.dataset.handler import DataHandlerLP class SFM_Model(nn.Module): def __init__( - self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", + self, + d_feat=6, + output_dim=1, + freq_dim=10, + hidden_size=64, + dropout_W=0.0, + dropout_U=0.0, + device="cpu", ): super().__init__() @@ -355,11 +362,17 @@ class SFM(Model): self.train_optimizer.step() def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 18e9d8eb4..62e32d701 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,7 +120,9 @@ class TabnetModel(Model): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -154,7 +156,11 @@ class TabnetModel(Model): break def fit( - self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -166,7 +172,9 @@ class TabnetModel(Model): # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index e37725c2e..ba2e5789b 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,7 +29,9 @@ class XGBModel(Model): ): df_train, df_valid = dataset.prepare( - ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, + ["train", "valid"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 52b868881..2bd0937a0 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,13 +150,21 @@ class SimulatorExecutor(BaseExecutor): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, + trade_date, + order.stock_id, + trade_price, + order.deal_amount, + trade_val, ) ) @@ -263,13 +271,21 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.SELL, + factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, + stock_id=stock_id, + amount=amount, + trade_date=pd.Timestamp(trade_date), + direction=Order.BUY, + factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index a4476709d..cf850b9da 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,10 +84,12 @@ class UserManager: raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, + self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, + self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c82deb394..c8b44f578 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,7 +125,9 @@ class Operator: trade_date=trade_date, ) save_order_list( - order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + order_list=order_list, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -158,7 +160,9 @@ class Operator: order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, + trade_info=trade_info, + user_path=(pathlib.Path(path) / user_id), + trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index fb96c87bd..611af63e4 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,7 +79,11 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) + dates = D.calendar( + start_time=pd.Timestamp(latest_trading_date), + end_time=pd.Timestamp(today), + future=True, + ) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index ef1447a12..1cb14d261 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,7 +53,8 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), + layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -61,7 +62,12 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["long-short", "long-average"], + ), ).figure return group_scatter_figure, group_hist_figure @@ -96,12 +102,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], + names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), + end=pd.Timestamp(f"{_index.max()[:4]}1231"), + freq="1M", ) _years = [] _month = [] @@ -133,15 +142,32 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), + ( + "ic", + dict( + row=1, + col=1, + name="", + kind="DistplotGraph", + graph_kwargs=dict(bin_size=_bin_size), + ), + ), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), + subplots_kwargs=dict( + rows=1, + cols=2, + print_grid=False, + subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], + ), sub_graph_data=_sub_graph_data, - layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), + layout=dict( + yaxis2=dict(title="Observed Quantile"), + xaxis2=dict(title=f"{dist_name} Distribution Quantile"), + ), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -155,7 +181,8 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, + layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -175,11 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) + r_df = pd.DataFrame( + { + "Top": top, + "Bottom": bottom, + } + ) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, + layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -197,7 +230,11 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), + ic_df, + layout=dict( + title="Information Coefficient (IC)", + xaxis=dict(type="category", tickangle=45), + ), ).figure return ic_bar_figure @@ -240,7 +277,12 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, + pred_label=pred_label, + lag=lag, + N=N, + reverse=reverse, + rank=rank, + show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index 604189c94..abb68ea60 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,7 +13,11 @@ from ..analysis_position.parse_position import get_position_data def _get_cum_return_data_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ): """ @@ -25,7 +29,11 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, + position=position, + report_normal=report_normal, + label_data=label_data, + start_date=start_date, + end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -79,7 +87,11 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, + position: dict, + report_normal: pd.DataFrame, + label_data: pd.DataFrame, + start_date=None, + end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -99,12 +111,18 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), + ( + "cum_{}".format(_t_name), + dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), + ), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), + ( + "{}_value".format(_t_name), + dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), + ), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -143,7 +161,13 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, + vertical_spacing=0.01, + rows=2, + cols=2, + row_width=[1, 2], + column_width=[3, 1], + print_grid=False, + specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index 23f9c592c..fe1d61137 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,7 +72,10 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) + previous_data = dict( + date=_trading_date, + code_list=_trading_day_df[_trading_day_df["status"] != -1].index, + ) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 9a4d834ed..72a358adc 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,7 +23,11 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, + position, + label_data, + calculate_label_rank=True, + start_date=start_date, + end_date=end_date, ) res_dict = dict() @@ -47,14 +51,20 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), + title=_col, + xaxis=dict(type="category", tickangle=45), + yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, + position: dict, + label_data: pd.DataFrame, + start_date=None, + end_date=None, + show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index 8e2c05c0a..f82e654c4 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,7 +123,9 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, { "type": "rect", @@ -135,13 +137,20 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": {"width": 0,}, + "line": { + "width": 0, + }, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, + shared_xaxes=True, + vertical_spacing=0.01, + rows=7, + cols=1, + row_width=[1, 1, 1, 3, 1, 1, 3], + print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index dbbc41110..70e382fb1 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,7 +311,11 @@ class SubplotsGraph: _temp_row_data = ( column_name, dict( - row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], + row=row, + col=col, + name=res_name, + kind=self._kind_map["kind"], + graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index ee3ee03ec..dd90437b0 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,7 +57,10 @@ class SoftTopkStrategy(WeightStrategyBase): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) + add_weight = min( + max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), + sold_stock_weight, + ) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 6f168b4dd..494981ecc 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,10 +102,14 @@ class OrderGenWInteract(OrderGenerator): # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, + weight_position=target_weight_position, + cash=current_tradable_value, + trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, + target_position=target_amount_dict, + current_position=current_amount_dict, + trade_date=trade_date, ) return order_list @@ -160,6 +164,8 @@ class OrderGenWOInteract(OrderGenerator): else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, + target_position=amount_dict, + current_position=current.get_stock_amount_dict(), + trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 409410a2a..711658c9a 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,7 +13,11 @@ from .config import TunerConfigManager args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", + "-c", + "--config_path", + required=True, + type=str, + help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 57f57a6c3..76f101671 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,5 +10,8 @@ TopkAmountStrategySpace = { } QLibDataLabelSpace = { - "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) + "labels": hp.choice( + "labels", + [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], + ) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index e81d41a9a..2ce957859 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,7 +28,10 @@ class Tuner: self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) + self.ex_dir = os.path.join( + self.tuner_config["experiment"]["dir"], + self.tuner_config["experiment"]["name"], + ) self.best_params = None self.best_res = None @@ -39,7 +42,10 @@ class Tuner: TimeInspector.set_time_mark() fmin( - fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, + fn=self.objective, + space=self.space, + algo=tpe.suggest, + max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -153,7 +159,8 @@ class QLibTuner(Tuner): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), + QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -166,20 +173,27 @@ class QLibTuner(Tuner): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) + model_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + model_space_name, + ) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) + strategy_space = getattr( + importlib.import_module(".space", package="qlib.contrib.tuner"), + strategy_space_name, + ) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), + data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index d1a68cb38..5244a7e45 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,7 +26,8 @@ class Client: self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", + lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 47cded79c..762467da3 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,7 +328,14 @@ class DatasetProvider(abc.ABC): raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=1, + **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -407,13 +414,29 @@ class DatasetProvider(abc.ABC): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + spans, + C, + ), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), + args=( + inst, + start_time, + end_time, + freq, + normalize_column_names, + None, + C, + ), ) p.close() @@ -575,7 +598,12 @@ class LocalInstrumentProvider(InstrumentProvider): start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) + inst: list( + filter( + lambda x: x[0] <= x[1], + [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], + ) + ) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -695,7 +723,14 @@ class LocalDatasetProvider(DatasetProvider): for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), + LocalDatasetProvider.cache_walker, + args=( + inst, + start_time, + end_time, + freq, + column_names, + ), ) p.close() @@ -728,7 +763,12 @@ class ClientCalendarProvider(CalendarProvider): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, + request_content={ + "start_time": str(start_time), + "end_time": str(end_time), + "freq": freq, + "future": future, + }, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -792,7 +832,14 @@ class ClientDatasetProvider(DatasetProvider): self.queue = queue.Queue() def dataset( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=0, + return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -895,7 +942,13 @@ class BaseProvider: return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index 58e2bd968..feda19044 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,7 +32,10 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, + df: pd.DataFrame, + selector: Union[pd.Timestamp, slice, str, list], + level: Union[str, int], + fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 811fd387f..70f9d3278 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,7 +341,12 @@ class ExpressionDFilter(SeriesDFilter): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, + instruments, + [self.rule_expression], + fstart, + fend, + freq=self.filter_freq, + disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index eb6f9c5ed..f92e72787 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,6 +18,10 @@ class TestAutoData(unittest.TestCase): print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, + name="qlib_data_simple", + region="cn", + interval="1d", + target_dir=provider_uri, + delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 0c704b896..be458a24d 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,7 +193,10 @@ class SigAnaRecord(SignalRecord): } ) objects.update( - {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} + { + "long_short_r.pkl": long_short_r, + "long_avg_r.pkl": long_avg_r, + } ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path())