diff --git a/docs/conf.py b/docs/conf.py index 6e52b0e34..61fe784e7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -191,15 +191,7 @@ man_pages = [(master_doc, "qlib", u"QLib Documentation", [author], 1)] # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ( - master_doc, - "QLib", - u"QLib Documentation", - author, - "QLib", - "One line description of project.", - "Miscellaneous", - ), + (master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",), ] diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py index b39f17825..f40a1aece 100644 --- a/examples/benchmarks/TFT/libs/tft_model.py +++ b/examples/benchmarks/TFT/libs/tft_model.py @@ -721,12 +721,7 @@ class TemporalFusionTransformer: encoder_steps = self.num_encoder_steps # Inputs. - all_inputs = tf.keras.layers.Input( - shape=( - time_steps, - combined_input_size, - ) - ) + all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,)) unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs) @@ -866,10 +861,7 @@ class TemporalFusionTransformer: """Returns LSTM cell initialized with default parameters.""" if self.use_cudnn: lstm = tf.keras.layers.CuDNNLSTM( - self.hidden_layer_size, - return_sequences=True, - return_state=return_state, - stateful=False, + self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False, ) else: lstm = tf.keras.layers.LSTM( diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py index d35650514..2fc411ab6 100644 --- a/examples/highfreq/highfreq_handler.py +++ b/examples/highfreq/highfreq_handler.py @@ -20,10 +20,7 @@ class HighFreqHandler(DataHandlerLP): new_l = [] for p in proc_l: p["kwargs"].update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } + {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} ) new_l.append(p) return new_l @@ -33,11 +30,7 @@ class HighFreqHandler(DataHandlerLP): data_loader = { "class": "QlibDataLoader", - "kwargs": { - "config": self.get_feature_config(), - "swap_level": False, - "freq": "1min", - }, + "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, } super().__init__( instruments=instruments, @@ -68,8 +61,7 @@ class HighFreqHandler(DataHandlerLP): feature_ops = template_norm.format( template_if.format( - template_fillnan.format(template_paused.format("$close")), - template_paused.format(price_field), + template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field), ), template_fillnan.format(template_paused.format("$close")), ) @@ -119,24 +111,14 @@ class HighFreqHandler(DataHandlerLP): class HighFreqBacktestHandler(DataHandler): def __init__( - self, - instruments="csi300", - start_time=None, - end_time=None, + self, instruments="csi300", start_time=None, end_time=None, ): data_loader = { "class": "QlibDataLoader", - "kwargs": { - "config": self.get_feature_config(), - "swap_level": False, - "freq": "1min", - }, + "kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",}, } super().__init__( - instruments=instruments, - start_time=start_time, - end_time=end_time, - data_loader=data_loader, + instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader, ) def get_feature_config(self): @@ -155,8 +137,7 @@ class HighFreqBacktestHandler(DataHandler): fields += [ "Cut({0}, 240, None)".format( template_if.format( - template_fillnan.format(template_paused.format("$close")), - template_paused.format(simpson_vwap), + template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap), ) ) ] diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py index f0ab0dec2..73510ef06 100644 --- a/examples/highfreq/highfreq_processor.py +++ b/examples/highfreq/highfreq_processor.py @@ -65,8 +65,6 @@ class HighFreqNorm(Processor): feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240) feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240) df_new_features = pd.DataFrame( - data=np.concatenate((feat, feat_1), axis=1), - index=idx, - columns=["FEATURE_%d" % i for i in range(12 * 240)], + data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)], ).sort_index() return df_new_features diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py index 01de59c0e..0bfd0c2a0 100644 --- a/examples/highfreq/workflow.py +++ b/examples/highfreq/workflow.py @@ -63,13 +63,7 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG0, }, - "segments": { - "train": (start_time, train_end_time), - "test": ( - test_start_time, - end_time, - ), - }, + "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, }, }, "dataset_backtest": { @@ -81,13 +75,7 @@ class HighfreqWorkflow(object): "module_path": "highfreq_handler", "kwargs": DATA_HANDLER_CONFIG1, }, - "segments": { - "train": (start_time, train_end_time), - "test": ( - test_start_time, - end_time, - ), - }, + "segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),}, }, }, } @@ -152,24 +140,11 @@ class HighfreqWorkflow(object): "start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00", }, - segment_kwargs={ - "test": ( - "2021-01-19 00:00:00", - "2021-01-25 16:00:00", - ), - }, + segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, ) dataset_backtest.init( - handler_kwargs={ - "start_time": "2021-01-19 00:00:00", - "end_time": "2021-01-25 16:00:00", - }, - segment_kwargs={ - "test": ( - "2021-01-19 00:00:00", - "2021-01-25 16:00:00", - ), - }, + handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",}, + segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),}, ) ##=============get data============= diff --git a/examples/run_all_model.py b/examples/run_all_model.py index d587eff15..d356b4128 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -34,10 +34,7 @@ exp_path = str(Path(os.getcwd()).resolve() / exp_folder_name) exp_manager = { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + exp_path, - "default_exp_name": "Experiment", - }, + "kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",}, } if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py index d5dab8917..6f5c11dc0 100644 --- a/examples/workflow_by_code.py +++ b/examples/workflow_by_code.py @@ -81,10 +81,7 @@ if __name__ == "__main__": "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, + "kwargs": {"topk": 50, "n_drop": 5,}, }, "backtest": { "verbose": False, diff --git a/qlib/config.py b/qlib/config.py index 52b05568d..344eb8527 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -115,12 +115,7 @@ _default_config = { "format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s" } }, - "filters": { - "field_not_found": { - "()": "qlib.log.LogFilter", - "param": [".*?WARN: data not found for.*?"], - } - }, + "filters": {"field_not_found": {"()": "qlib.log.LogFilter", "param": [".*?WARN: data not found for.*?"],}}, "handlers": { "console": { "class": "logging.StreamHandler", @@ -135,10 +130,7 @@ _default_config = { "exp_manager": { "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), - "default_exp_name": "Experiment", - }, + "kwargs": {"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"), "default_exp_name": "Experiment",}, }, } @@ -200,16 +192,8 @@ HIGH_FREQ_CONFIG = { } _default_region_config = { - REG_CN: { - "trade_unit": 100, - "limit_threshold": 0.099, - "deal_price": "vwap", - }, - REG_US: { - "trade_unit": 1, - "limit_threshold": None, - "deal_price": "close", - }, + REG_CN: {"trade_unit": 100, "limit_threshold": 0.099, "deal_price": "vwap",}, + REG_US: {"trade_unit": 1, "limit_threshold": None, "deal_price": "close",}, } diff --git a/qlib/contrib/backtest/__init__.py b/qlib/contrib/backtest/__init__.py index aa24ffb0c..bd3494abf 100644 --- a/qlib/contrib/backtest/__init__.py +++ b/qlib/contrib/backtest/__init__.py @@ -18,13 +18,7 @@ logger = get_module_logger("backtest caller") def get_strategy( - strategy=None, - topk=50, - margin=0.5, - n_drop=5, - risk_degree=0.95, - str_type="dropout", - adjust_dates=None, + strategy=None, topk=50, margin=0.5, n_drop=5, risk_degree=0.95, str_type="dropout", adjust_dates=None, ): """get_strategy @@ -75,11 +69,7 @@ def get_strategy( str_cls = getattr(strategy_pool, str_cls_dict.get(str_type)) strategy = str_cls( - topk=topk, - buffer_margin=margin, - n_drop=n_drop, - risk_degree=risk_degree, - adjust_dates=adjust_dates, + topk=topk, buffer_margin=margin, n_drop=n_drop, risk_degree=risk_degree, adjust_dates=adjust_dates, ) elif isinstance(strategy, (dict, str)): # 2) create strategy with init_instance_by_config @@ -172,9 +162,7 @@ def get_exchange( def get_executor( - executor=None, - trade_exchange=None, - verbose=True, + executor=None, trade_exchange=None, verbose=True, ): """get_executor diff --git a/qlib/contrib/backtest/profit_attribution.py b/qlib/contrib/backtest/profit_attribution.py index 20c6f638f..355f06373 100644 --- a/qlib/contrib/backtest/profit_attribution.py +++ b/qlib/contrib/backtest/profit_attribution.py @@ -12,10 +12,7 @@ from pathlib import Path def get_benchmark_weight( - bench, - start_date=None, - end_date=None, - path=None, + bench, start_date=None, end_date=None, path=None, ): """get_benchmark_weight @@ -216,12 +213,7 @@ def get_stock_group(stock_group_field_df, bench_stock_weight_df, group_method, g def brinson_pa( - positions, - bench="SH000905", - group_field="industry", - group_method="category", - group_n=None, - deal_price="vwap", + positions, bench="SH000905", group_field="industry", group_method="category", group_n=None, deal_price="vwap", ): """brinson profit attribution @@ -255,17 +247,10 @@ def brinson_pa( # suspend stock is NAN. So we have to get more date to forward fill the NAN shift_start_date = start_date - datetime.timedelta(days=250) instruments = D.list_instruments( - D.instruments(market="all"), - start_time=shift_start_date, - end_time=end_date, - as_list=True, + D.instruments(market="all"), start_time=shift_start_date, end_time=end_date, as_list=True, ) stock_df = D.features( - instruments, - [group_field, deal_price], - start_time=shift_start_date, - end_time=end_date, - freq="day", + instruments, [group_field, deal_price], start_time=shift_start_date, end_time=end_date, freq="day", ) stock_df.columns = [group_field, "deal_price"] diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 970b032d6..574287819 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -21,10 +21,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): fit_start_time is not None and fit_end_time is not None ), "Make sure `fit_start_time` and `fit_end_time` are not None." pkwargs.update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } + {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,} ) new_l.append({"class": klass.__name__, "kwargs": pkwargs}) else: @@ -170,10 +167,7 @@ class Alpha158(DataHandlerLP): def get_feature_config(self): conf = { "kbar": {}, - "price": { - "windows": [0], - "feature": ["OPEN", "HIGH", "LOW", "VWAP"], - }, + "price": {"windows": [0], "feature": ["OPEN", "HIGH", "LOW", "VWAP"],}, "rolling": {}, } return self.parse_config_to_fields(conf) diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index c68571853..363a18458 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -35,11 +35,7 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False def calc_long_short_return( - pred: pd.Series, - label: pd.Series, - date_col: str = "datetime", - quantile: float = 0.2, - dropna: bool = False, + pred: pd.Series, label: pd.Series, date_col: str = "datetime", quantile: float = 0.2, dropna: bool = False, ) -> Tuple[pd.Series, pd.Series]: """ calculate long-short return diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 4aa5b5515..5cb1ce4eb 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -244,12 +244,7 @@ def long_short_backtest( short_returns[date] = np.mean(short_profit) + np.mean(all_profit) ls_returns[date] = np.mean(short_profit) + np.mean(long_profit) - return dict( - zip( - ["long", "short", "long_short"], - map(pd.Series, [long_returns, short_returns, ls_returns]), - ) - ) + return dict(zip(["long", "short", "long_short"], map(pd.Series, [long_returns, short_returns, ls_returns]),)) def t_run(): diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index 04ddd8db0..2d94105e4 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -64,12 +64,7 @@ def get_position_value(evaluate_date, position): instruments = list(set(instruments) - set(["cash"])) # filter 'cash' fields = ["$close"] close_data_df = D.features( - instruments, - fields, - start_time=evaluate_date, - end_time=evaluate_date, - freq="day", - disk_cache=0, + instruments, fields, start_time=evaluate_date, end_time=evaluate_date, freq="day", disk_cache=0, ) value = _get_position_value_from_df(evaluate_date, position, close_data_df) return value @@ -87,14 +82,7 @@ def get_position_list_value(positions): start_date, end_date = day_list[0], day_list[-1] # load data fields = ["$close"] - close_data_df = D.features( - instruments, - fields, - start_time=start_date, - end_time=end_date, - freq="day", - disk_cache=0, - ) + close_data_df = D.features(instruments, fields, start_time=start_date, end_time=end_date, freq="day", disk_cache=0,) # generate value # return dict for time:position_value value_dict = OrderedDict() diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py index d57c32b70..2840c2cef 100644 --- a/qlib/contrib/model/catboost_model.py +++ b/qlib/contrib/model/catboost_model.py @@ -32,9 +32,7 @@ class CatBoostModel(Model): **kwargs ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index bbbb61851..306e68aad 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -118,10 +118,7 @@ class ALSTM(Model): torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ class ALSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -328,14 +319,12 @@ class ALSTMModel(nn.Module): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de8..612bacbec 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -123,10 +123,7 @@ class ALSTM(Model): torch.manual_seed(self.seed) self.ALSTM_model = ALSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ class ALSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -309,14 +302,12 @@ class ALSTMModel(nn.Module): self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) self.att_net = nn.Sequential() self.att_net.add_module( - "att_fc_in", - nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), + "att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)), ) self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) self.att_net.add_module("att_act", nn.Tanh()) self.att_net.add_module( - "att_fc_out", - nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), + "att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False), ) self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index 07048e1bc..c59dc9197 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -229,17 +229,11 @@ class GATs(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -340,19 +334,11 @@ class GATModel(nn.Module): if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 1e94f56e4..dfc5f4ab5 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -242,11 +242,7 @@ class GATs(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -361,19 +357,11 @@ class GATModel(nn.Module): if base_model == "GRU": self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 84f863b9f..d2a774b65 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -118,10 +118,7 @@ class GRU(Model): torch.manual_seed(self.seed) self.gru_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ class GRU(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ class GRUModel(nn.Module): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b85..49f438cc3 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -123,10 +123,7 @@ class GRU(Model): torch.manual_seed(self.seed) self.GRU_model = GRUModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ class GRU(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ class GRUModel(nn.Module): super().__init__() self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 163d500ec..02ca16e36 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -118,10 +118,7 @@ class LSTM(Model): torch.manual_seed(self.seed) self.lstm_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr) @@ -211,17 +208,11 @@ class LSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid, df_test = dataset.prepare( - ["train", "valid", "test"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] @@ -305,11 +296,7 @@ class LSTMModel(nn.Module): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f..2ec36f96e 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -123,10 +123,7 @@ class LSTM(Model): torch.manual_seed(self.seed) self.LSTM_model = LSTMModel( - d_feat=self.d_feat, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - dropout=self.dropout, + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, ).to(self.device) if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.LSTM_model.parameters(), lr=self.lr) @@ -198,11 +195,7 @@ class LSTM(Model): return np.mean(losses), np.mean(scores) def fit( - self, - dataset, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset, evals_result=dict(), verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) @@ -286,11 +279,7 @@ class LSTMModel(nn.Module): super().__init__() self.rnn = nn.LSTM( - input_size=d_feat, - hidden_size=hidden_size, - num_layers=num_layers, - batch_first=True, - dropout=dropout, + input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) self.fc_out = nn.Linear(hidden_size, 1) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 16fcea9ff..8c1a77ec3 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -154,11 +154,7 @@ class DNNModelPytorch(Model): self.dnn_model.to(self.device) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index d5169e6c7..1f7433e05 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -30,14 +30,7 @@ from ...data.dataset.handler import DataHandlerLP class SFM_Model(nn.Module): def __init__( - self, - d_feat=6, - output_dim=1, - freq_dim=10, - hidden_size=64, - dropout_W=0.0, - dropout_U=0.0, - device="cpu", + self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu", ): super().__init__() @@ -362,17 +355,11 @@ class SFM(Model): self.train_optimizer.step() def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 62e32d701..18e9d8eb4 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -120,9 +120,7 @@ class TabnetModel(Model): os.makedirs("pretrain") [df_train, df_valid] = dataset.prepare( - ["pretrain", "pretrain_validation"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["pretrain", "pretrain_validation"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) @@ -156,11 +154,7 @@ class TabnetModel(Model): break def fit( - self, - dataset: DatasetH, - evals_result=dict(), - verbose=True, - save_path=None, + self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, ): if self.pretrain: # there is a pretrained model, load the model @@ -172,9 +166,7 @@ class TabnetModel(Model): # adding one more linear layer to fit the final output dimension self.tabnet_model = FinetuneModel(self.out_dim, self.final_out_dim, self.tabnet_model).to(self.device) df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) df_train.fillna(df_train.mean(), inplace=True) x_train, y_train = df_train["feature"], df_train["label"] diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py index ba2e5789b..e37725c2e 100755 --- a/qlib/contrib/model/xgboost.py +++ b/qlib/contrib/model/xgboost.py @@ -29,9 +29,7 @@ class XGBModel(Model): ): df_train, df_valid = dataset.prepare( - ["train", "valid"], - col_set=["feature", "label"], - data_key=DataHandlerLP.DK_L, + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, ) x_train, y_train = df_train["feature"], df_train["label"] x_valid, y_valid = df_valid["feature"], df_valid["label"] diff --git a/qlib/contrib/online/executor.py b/qlib/contrib/online/executor.py index 2bd0937a0..52b868881 100644 --- a/qlib/contrib/online/executor.py +++ b/qlib/contrib/online/executor.py @@ -150,21 +150,13 @@ class SimulatorExecutor(BaseExecutor): if order.direction == Order.SELL: # sell print( "[I {:%Y-%m-%d}]: sell {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) else: print( "[I {:%Y-%m-%d}]: buy {}, price {:.2f}, amount {}, value {:.2f}.".format( - trade_date, - order.stock_id, - trade_price, - order.deal_amount, - trade_val, + trade_date, order.stock_id, trade_price, order.deal_amount, trade_val, ) ) @@ -271,21 +263,13 @@ def load_order_list(user_path, trade_date): for stock_id in order_dict["sell"]: amount, factor = order_dict["sell"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.SELL, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.SELL, factor=factor, ) order_list.append(order) for stock_id in order_dict["buy"]: amount, factor = order_dict["buy"][stock_id] order = Order( - stock_id=stock_id, - amount=amount, - trade_date=pd.Timestamp(trade_date), - direction=Order.BUY, - factor=factor, + stock_id=stock_id, amount=amount, trade_date=pd.Timestamp(trade_date), direction=Order.BUY, factor=factor, ) order_list.append(order) return order_list diff --git a/qlib/contrib/online/manager.py b/qlib/contrib/online/manager.py index cf850b9da..a4476709d 100644 --- a/qlib/contrib/online/manager.py +++ b/qlib/contrib/online/manager.py @@ -84,12 +84,10 @@ class UserManager: raise ValueError("Cannot find user {}".format(user_id)) self.users[user_id].account.save_account(self.data_path / user_id) save_instance( - self.users[user_id].strategy, - self.data_path / user_id / "strategy_{}.pickle".format(user_id), + self.users[user_id].strategy, self.data_path / user_id / "strategy_{}.pickle".format(user_id), ) save_instance( - self.users[user_id].model, - self.data_path / user_id / "model_{}.pickle".format(user_id), + self.users[user_id].model, self.data_path / user_id / "model_{}.pickle".format(user_id), ) def add_user(self, user_id, config_file, add_date): diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c8b44f578..c82deb394 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -125,9 +125,7 @@ class Operator: trade_date=trade_date, ) save_order_list( - order_list=order_list, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + order_list=order_list, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("Generate order list at {} for {}".format(trade_date, user_id)) um.save_user_data(user_id) @@ -160,9 +158,7 @@ class Operator: order_list = load_order_list(user_path=(pathlib.Path(path) / user_id), trade_date=trade_date) trade_info = executor.execute(order_list=order_list, trade_account=user.account, trade_date=trade_date) executor.save_executed_file_from_trade_info( - trade_info=trade_info, - user_path=(pathlib.Path(path) / user_id), - trade_date=trade_date, + trade_info=trade_info, user_path=(pathlib.Path(path) / user_id), trade_date=trade_date, ) self.logger.info("execute order list at {} for {}".format(trade_date.date(), user_id)) diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index 611af63e4..fb96c87bd 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -79,11 +79,7 @@ def prepare(um, today, user_id, exchange_config=None): log.warning("user_id:{}, last trading date {} after today {}".format(user_id, latest_trading_date, today)) return [pd.Timestamp(latest_trading_date)], None - dates = D.calendar( - start_time=pd.Timestamp(latest_trading_date), - end_time=pd.Timestamp(today), - future=True, - ) + dates = D.calendar(start_time=pd.Timestamp(latest_trading_date), end_time=pd.Timestamp(today), future=True,) dates = list(dates) dates.append(get_next_trading_date(dates[-1], future=True)) if exchange_config: diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index 1cb14d261..ef1447a12 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -53,8 +53,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int t_df.index = t_df.index.strftime("%Y-%m-%d") # Cumulative Return By Group group_scatter_figure = ScatterGraph( - t_df.cumsum(), - layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), + t_df.cumsum(), layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)), ).figure t_df = t_df.loc[:, ["long-short", "long-average"]] @@ -62,12 +61,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int group_hist_figure = SubplotsGraph( t_df, kind_map=dict(kind="DistplotGraph", kwargs=dict(bin_size=_bin_size)), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["long-short", "long-average"], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["long-short", "long-average"],), ).figure return group_scatter_figure, group_hist_figure @@ -102,15 +96,12 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6) _monthly_ic = ic.groupby(_index).mean() _monthly_ic.index = pd.MultiIndex.from_arrays( - [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], - names=["year", "month"], + [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)], names=["year", "month"], ) # fill month _month_list = pd.date_range( - start=pd.Timestamp(f"{_index.min()[:4]}0101"), - end=pd.Timestamp(f"{_index.max()[:4]}1231"), - freq="1M", + start=pd.Timestamp(f"{_index.min()[:4]}0101"), end=pd.Timestamp(f"{_index.max()[:4]}1231"), freq="1M", ) _years = [] _month = [] @@ -142,32 +133,15 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t _bin_size = ((_ic_df.max() - _ic_df.min()) / 20).min() _sub_graph_data = [ - ( - "ic", - dict( - row=1, - col=1, - name="", - kind="DistplotGraph", - graph_kwargs=dict(bin_size=_bin_size), - ), - ), + ("ic", dict(row=1, col=1, name="", kind="DistplotGraph", graph_kwargs=dict(bin_size=_bin_size),),), (_qqplot_fig, dict(row=1, col=2)), ] ic_hist_figure = SubplotsGraph( _ic_df.dropna(), kind_map=dict(kind="HistogramGraph", kwargs=dict()), - subplots_kwargs=dict( - rows=1, - cols=2, - print_grid=False, - subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name], - ), + subplots_kwargs=dict(rows=1, cols=2, print_grid=False, subplot_titles=["IC", "IC %s Dist. Q-Q" % dist_name],), sub_graph_data=_sub_graph_data, - layout=dict( - yaxis2=dict(title="Observed Quantile"), - xaxis2=dict(title=f"{dist_name} Distribution Quantile"), - ), + layout=dict(yaxis2=dict(title="Observed Quantile"), xaxis2=dict(title=f"{dist_name} Distribution Quantile"),), ).figure return ic_bar_figure, ic_heatmap_figure, ic_hist_figure @@ -181,8 +155,7 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple: _df = ac.to_frame("value") _df.index = _df.index.strftime("%Y-%m-%d") ac_figure = ScatterGraph( - _df, - layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), + _df, layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)), ).figure return (ac_figure,) @@ -202,17 +175,11 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple: .sum() / (len(x) // N) ) - r_df = pd.DataFrame( - { - "Top": top, - "Bottom": bottom, - } - ) + r_df = pd.DataFrame({"Top": top, "Bottom": bottom,}) # FIXME: support HIGH-FREQ r_df.index = r_df.index.strftime("%Y-%m-%d") turnover_figure = ScatterGraph( - r_df, - layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), + r_df, layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)), ).figure return (turnover_figure,) @@ -230,11 +197,7 @@ def ic_figure(ic_df: pd.DataFrame, show_nature_day=True, **kwargs) -> go.Figure: # FIXME: support HIGH-FREQ ic_df.index = ic_df.index.strftime("%Y-%m-%d") ic_bar_figure = BarGraph( - ic_df, - layout=dict( - title="Information Coefficient (IC)", - xaxis=dict(type="category", tickangle=45), - ), + ic_df, layout=dict(title="Information Coefficient (IC)", xaxis=dict(type="category", tickangle=45),), ).figure return ic_bar_figure @@ -277,12 +240,7 @@ def model_performance_graph( figure_list = [] for graph_name in graph_names: fun_res = eval(f"_{graph_name}")( - pred_label=pred_label, - lag=lag, - N=N, - reverse=reverse, - rank=rank, - show_nature_day=show_nature_day, + pred_label=pred_label, lag=lag, N=N, reverse=reverse, rank=rank, show_nature_day=show_nature_day, ) figure_list += fun_res diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index abb68ea60..604189c94 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -13,11 +13,7 @@ from ..analysis_position.parse_position import get_position_data def _get_cum_return_data_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ): """ @@ -29,11 +25,7 @@ def _get_cum_return_data_with_position( :return: """ _cumulative_return_df = get_position_data( - position=position, - report_normal=report_normal, - label_data=label_data, - start_date=start_date, - end_date=end_date, + position=position, report_normal=report_normal, label_data=label_data, start_date=start_date, end_date=end_date, ).copy() _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"] @@ -87,11 +79,7 @@ def _get_cum_return_data_with_position( def _get_figure_with_position( - position: dict, - report_normal: pd.DataFrame, - label_data: pd.DataFrame, - start_date=None, - end_date=None, + position: dict, report_normal: pd.DataFrame, label_data: pd.DataFrame, start_date=None, end_date=None, ) -> Iterable[go.Figure]: """Get average analysis figures @@ -111,18 +99,12 @@ def _get_figure_with_position( # Create figures for _t_name in ["buy", "sell", "buy_minus_sell", "hold"]: sub_graph_data = [ - ( - "cum_{}".format(_t_name), - dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}), - ), + ("cum_{}".format(_t_name), dict(row=1, col=1, graph_kwargs={"mode": "lines+markers", "xaxis": "x3"}),), ( "{}_weight".format(_t_name.replace("minus", "plus") if "minus" in _t_name else _t_name), dict(row=2, col=1), ), - ( - "{}_value".format(_t_name), - dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}), - ), + ("{}_value".format(_t_name), dict(row=1, col=2, kind="HistogramGraph", graph_kwargs={}),), ] _default_xaxis = dict(showline=False, zeroline=True, tickangle=45) @@ -161,13 +143,7 @@ def _get_figure_with_position( [{"rowspan": 1}, None], ] subplots_kwargs = dict( - vertical_spacing=0.01, - rows=2, - cols=2, - row_width=[1, 2], - column_width=[3, 1], - print_grid=False, - specs=specs, + vertical_spacing=0.01, rows=2, cols=2, row_width=[1, 2], column_width=[3, 1], print_grid=False, specs=specs, ) yield SubplotsGraph( cum_return_df, diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index fe1d61137..23f9c592c 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -72,10 +72,7 @@ def parse_position(position: dict = None) -> pd.DataFrame: result_df = result_df.append(_trading_day_df, sort=True) - previous_data = dict( - date=_trading_date, - code_list=_trading_day_df[_trading_day_df["status"] != -1].index, - ) + previous_data = dict(date=_trading_date, code_list=_trading_day_df[_trading_day_df["status"] != -1].index,) result_df.reset_index(inplace=True) result_df.rename(columns={"date": "datetime", "index": "instrument"}, inplace=True) diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 72a358adc..9a4d834ed 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -23,11 +23,7 @@ def _get_figure_with_position( :return: """ _position_df = get_position_data( - position, - label_data, - calculate_label_rank=True, - start_date=start_date, - end_date=end_date, + position, label_data, calculate_label_rank=True, start_date=start_date, end_date=end_date, ) res_dict = dict() @@ -51,20 +47,14 @@ def _get_figure_with_position( yield ScatterGraph( _res_df.loc[:, [_col]], layout=dict( - title=_col, - xaxis=dict(type="category", tickangle=45), - yaxis=dict(title="lable-rank-ratio: %"), + title=_col, xaxis=dict(type="category", tickangle=45), yaxis=dict(title="lable-rank-ratio: %"), ), graph_kwargs=dict(mode="lines+markers"), ).figure def rank_label_graph( - position: dict, - label_data: pd.DataFrame, - start_date=None, - end_date=None, - show_notebook=True, + position: dict, label_data: pd.DataFrame, start_date=None, end_date=None, show_notebook=True, ) -> Iterable[go.Figure]: """Ranking percentage of stocks buy, sell, and holding on the trading day. Average rank-ratio(similar to **sell_df['label'].rank(ascending=False) / len(sell_df)**) of daily trading diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py index f82e654c4..8e2c05c0a 100644 --- a/qlib/contrib/report/analysis_position/report.py +++ b/qlib/contrib/report/analysis_position/report.py @@ -123,9 +123,7 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 1, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, { "type": "rect", @@ -137,20 +135,13 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]: "y1": 0.55, "fillcolor": "#d3d3d3", "opacity": 0.3, - "line": { - "width": 0, - }, + "line": {"width": 0,}, }, ], ) _subplot_kwargs = dict( - shared_xaxes=True, - vertical_spacing=0.01, - rows=7, - cols=1, - row_width=[1, 1, 1, 3, 1, 1, 3], - print_grid=False, + shared_xaxes=True, vertical_spacing=0.01, rows=7, cols=1, row_width=[1, 1, 1, 3, 1, 1, 3], print_grid=False, ) figure = SubplotsGraph( df=report_df, diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py index 70e382fb1..dbbc41110 100644 --- a/qlib/contrib/report/graph.py +++ b/qlib/contrib/report/graph.py @@ -311,11 +311,7 @@ class SubplotsGraph: _temp_row_data = ( column_name, dict( - row=row, - col=col, - name=res_name, - kind=self._kind_map["kind"], - graph_kwargs=self._kind_map["kwargs"], + row=row, col=col, name=res_name, kind=self._kind_map["kind"], graph_kwargs=self._kind_map["kwargs"], ), ) self._sub_graph_data.append(_temp_row_data) diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index dd90437b0..ee3ee03ec 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -57,10 +57,7 @@ class SoftTopkStrategy(WeightStrategyBase): final_stock_weight[stock_id] -= sw if self.buy_method == "first_fill": for stock_id in buy_signal_stocks: - add_weight = min( - max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), - sold_stock_weight, - ) + add_weight = min(max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0), sold_stock_weight,) final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight sold_stock_weight -= add_weight elif self.buy_method == "average_fill": diff --git a/qlib/contrib/strategy/order_generator.py b/qlib/contrib/strategy/order_generator.py index 494981ecc..6f168b4dd 100644 --- a/qlib/contrib/strategy/order_generator.py +++ b/qlib/contrib/strategy/order_generator.py @@ -102,14 +102,10 @@ class OrderGenWInteract(OrderGenerator): # strategy 1 : generate amount_position by weight_position # Use API in Exchange() target_amount_dict = trade_exchange.generate_amount_position_from_weight_position( - weight_position=target_weight_position, - cash=current_tradable_value, - trade_date=trade_date, + weight_position=target_weight_position, cash=current_tradable_value, trade_date=trade_date, ) order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=target_amount_dict, - current_position=current_amount_dict, - trade_date=trade_date, + target_position=target_amount_dict, current_position=current_amount_dict, trade_date=trade_date, ) return order_list @@ -164,8 +160,6 @@ class OrderGenWOInteract(OrderGenerator): else: continue order_list = trade_exchange.generate_order_for_target_amount_position( - target_position=amount_dict, - current_position=current.get_stock_amount_dict(), - trade_date=trade_date, + target_position=amount_dict, current_position=current.get_stock_amount_dict(), trade_date=trade_date, ) return order_list diff --git a/qlib/contrib/tuner/launcher.py b/qlib/contrib/tuner/launcher.py index 711658c9a..409410a2a 100644 --- a/qlib/contrib/tuner/launcher.py +++ b/qlib/contrib/tuner/launcher.py @@ -13,11 +13,7 @@ from .config import TunerConfigManager args_parser = argparse.ArgumentParser(prog="tuner") args_parser.add_argument( - "-c", - "--config_path", - required=True, - type=str, - help="config path indicates where to load yaml config.", + "-c", "--config_path", required=True, type=str, help="config path indicates where to load yaml config.", ) args = args_parser.parse_args() diff --git a/qlib/contrib/tuner/space.py b/qlib/contrib/tuner/space.py index 76f101671..57f57a6c3 100644 --- a/qlib/contrib/tuner/space.py +++ b/qlib/contrib/tuner/space.py @@ -10,8 +10,5 @@ TopkAmountStrategySpace = { } QLibDataLabelSpace = { - "labels": hp.choice( - "labels", - [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]], - ) + "labels": hp.choice("labels", [["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["Ref($close, -5)/$close - 1"]],) } diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 2ce957859..e81d41a9a 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -28,10 +28,7 @@ class Tuner: self.optim_config = optim_config self.max_evals = self.tuner_config.get("max_evals", 10) - self.ex_dir = os.path.join( - self.tuner_config["experiment"]["dir"], - self.tuner_config["experiment"]["name"], - ) + self.ex_dir = os.path.join(self.tuner_config["experiment"]["dir"], self.tuner_config["experiment"]["name"],) self.best_params = None self.best_res = None @@ -42,10 +39,7 @@ class Tuner: TimeInspector.set_time_mark() fmin( - fn=self.objective, - space=self.space, - algo=tpe.suggest, - max_evals=self.max_evals, + fn=self.objective, space=self.space, algo=tpe.suggest, max_evals=self.max_evals, ) self.logger.info("Local best params: {} ".format(self.best_params)) TimeInspector.log_cost_time( @@ -159,8 +153,7 @@ class QLibTuner(Tuner): estimator_config["data"]["args"].update(params["data_label_space"]) estimator_path = os.path.join( - self.tuner_config["experiment"].get("dir", "../"), - QLibTuner.ESTIMATOR_CONFIG_NAME, + self.tuner_config["experiment"].get("dir", "../"), QLibTuner.ESTIMATOR_CONFIG_NAME, ) with open(estimator_path, "w") as fp: @@ -173,27 +166,20 @@ class QLibTuner(Tuner): model_space_name = self.tuner_config["model"].get("space", None) if model_space_name is None: raise ValueError("Please give the search space of model.") - model_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - model_space_name, - ) + model_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), model_space_name,) # 2. Setup strategy space strategy_space_name = self.tuner_config["strategy"].get("space", None) if strategy_space_name is None: raise ValueError("Please give the search space of strategy.") - strategy_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - strategy_space_name, - ) + strategy_space = getattr(importlib.import_module(".space", package="qlib.contrib.tuner"), strategy_space_name,) # 3. Setup data label space if given if self.tuner_config.get("data_label", None) is not None: data_label_space_name = self.tuner_config["data_label"].get("space", None) if data_label_space_name is not None: data_label_space = getattr( - importlib.import_module(".space", package="qlib.contrib.tuner"), - data_label_space_name, + importlib.import_module(".space", package="qlib.contrib.tuner"), data_label_space_name, ) else: data_label_space_name = None diff --git a/qlib/data/client.py b/qlib/data/client.py index 5244a7e45..d1a68cb38 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -26,8 +26,7 @@ class Client: self.logger = get_module_logger(self.__class__.__name__) # bind connect/disconnect callbacks self.sio.on( - "connect", - lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), + "connect", lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)), ) self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!")) diff --git a/qlib/data/data.py b/qlib/data/data.py index 762467da3..47cded79c 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -328,14 +328,7 @@ class DatasetProvider(abc.ABC): raise NotImplementedError("Subclass of DatasetProvider must implement `Dataset` method") def _uri( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=1, - **kwargs, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=1, **kwargs, ): """Get task uri, used when generating rabbitmq task in qlib_server @@ -414,29 +407,13 @@ class DatasetProvider(abc.ABC): for inst, spans in instruments_d.items(): data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - spans, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, spans, C,), ) else: for inst in instruments_d: data[inst] = p.apply_async( DatasetProvider.expression_calculator, - args=( - inst, - start_time, - end_time, - freq, - normalize_column_names, - None, - C, - ), + args=(inst, start_time, end_time, freq, normalize_column_names, None, C,), ) p.close() @@ -598,12 +575,7 @@ class LocalInstrumentProvider(InstrumentProvider): start_time = pd.Timestamp(start_time or cal[0]) end_time = pd.Timestamp(end_time or cal[-1]) _instruments_filtered = { - inst: list( - filter( - lambda x: x[0] <= x[1], - [(max(start_time, x[0]), min(end_time, x[1])) for x in spans], - ) - ) + inst: list(filter(lambda x: x[0] <= x[1], [(max(start_time, x[0]), min(end_time, x[1])) for x in spans],)) for inst, spans in _instruments.items() } _instruments_filtered = {key: value for key, value in _instruments_filtered.items() if value} @@ -723,14 +695,7 @@ class LocalDatasetProvider(DatasetProvider): for inst in instruments_d: p.apply_async( - LocalDatasetProvider.cache_walker, - args=( - inst, - start_time, - end_time, - freq, - column_names, - ), + LocalDatasetProvider.cache_walker, args=(inst, start_time, end_time, freq, column_names,), ) p.close() @@ -763,12 +728,7 @@ class ClientCalendarProvider(CalendarProvider): def calendar(self, start_time=None, end_time=None, freq="day", future=False): self.conn.send_request( request_type="calendar", - request_content={ - "start_time": str(start_time), - "end_time": str(end_time), - "freq": freq, - "future": future, - }, + request_content={"start_time": str(start_time), "end_time": str(end_time), "freq": freq, "future": future,}, msg_queue=self.queue, msg_proc_func=lambda response_content: [pd.Timestamp(c) for c in response_content], ) @@ -832,14 +792,7 @@ class ClientDatasetProvider(DatasetProvider): self.queue = queue.Queue() def dataset( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=0, - return_uri=False, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, return_uri=False, ): if Inst.get_inst_type(instruments) == Inst.DICT: get_module_logger("data").warning( @@ -942,13 +895,7 @@ class BaseProvider: return Inst.list_instruments(instruments, start_time, end_time, freq, as_list) def features( - self, - instruments, - fields, - start_time=None, - end_time=None, - freq="day", - disk_cache=None, + self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=None, ): """ Parameters: diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py index feda19044..58e2bd968 100644 --- a/qlib/data/dataset/utils.py +++ b/qlib/data/dataset/utils.py @@ -32,10 +32,7 @@ def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int: def fetch_df_by_index( - df: pd.DataFrame, - selector: Union[pd.Timestamp, slice, str, list], - level: Union[str, int], - fetch_orig=True, + df: pd.DataFrame, selector: Union[pd.Timestamp, slice, str, list], level: Union[str, int], fetch_orig=True, ) -> pd.DataFrame: """ fetch data from `data` with `selector` and `level` diff --git a/qlib/data/filter.py b/qlib/data/filter.py index 70f9d3278..811fd387f 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -341,12 +341,7 @@ class ExpressionDFilter(SeriesDFilter): # do not use dataset cache try: _features = DatasetD.dataset( - instruments, - [self.rule_expression], - fstart, - fend, - freq=self.filter_freq, - disk_cache=0, + instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq, disk_cache=0, ) except TypeError: # use LocalDatasetProvider diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py index 8eec73e00..f19c60fc9 100644 --- a/qlib/model/riskmodel.py +++ b/qlib/model/riskmodel.py @@ -38,7 +38,7 @@ class RiskModel(BaseModel): self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True ) -> Union[pd.DataFrame, np.ndarray]: """ Args: @@ -373,8 +373,7 @@ class ShrinkCovEstimator(RiskModel): roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt v3 = z.T.dot(z) / t - var_mkt * S roff3 = ( - np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum( - np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 ) roff = 2 * roff1 - roff3 rho = rdiag + roff @@ -434,7 +433,7 @@ class POETCovEstimator(RiskModel): if self.num_factors > 0: Dd, V = np.linalg.eig(Y.T.dot(Y)) V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) + F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) LamPCA = Y.dot(F) / n uhat = np.asarray(Y - LamPCA.dot(F.T)) Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) @@ -490,8 +489,14 @@ class StructuredCovEstimator(RiskModel): FACTOR_MODEL_PCA = "pca" FACTOR_MODEL_FA = "fa" - def __init__(self, factor_model: str = 'pca', num_factors: int = 10, nan_option: str = "ignore", - assume_centered: bool = False, scale_return: bool = True): + def __init__( + self, + factor_model: str = "pca", + num_factors: int = 10, + nan_option: str = "ignore", + assume_centered: bool = False, + scale_return: bool = True, + ): """ Args: factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`). @@ -505,14 +510,17 @@ class StructuredCovEstimator(RiskModel): assert factor_model in [ self.FACTOR_MODEL_PCA, self.FACTOR_MODEL_FA, - ], 'factor_model={} is not supported'.format(factor_model) + ], "factor_model={} is not supported".format(factor_model) self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis self.num_factors = num_factors def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, - return_decomposed_components=False + self, + X: Union[pd.Series, pd.DataFrame, np.ndarray], + return_corr: bool = False, + is_price: bool = True, + return_decomposed_components=False, ) -> Union[pd.DataFrame, np.ndarray, tuple]: """ Args: @@ -525,8 +533,9 @@ class StructuredCovEstimator(RiskModel): Returns: tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. """ - assert not return_corr or not return_decomposed_components, \ - 'Can only return either correlation matrix or decomposed components.' + assert ( + not return_corr or not return_decomposed_components + ), "Can only return either correlation matrix or decomposed components." # transform input into 2D array if not isinstance(X, (pd.Series, pd.DataFrame)): diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 728a04ea9..391242127 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -38,13 +38,13 @@ class PortfolioOptimizer(BaseOptimizer): OPT_INV = "inv" def __init__( - self, - method: str = "inv", - lamb: float = 0, - delta: float = 0, - alpha: float = 0.0, - scale_alpha: bool = True, - tol: float = 1e-8, + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, ): """ Args: @@ -71,10 +71,10 @@ class PortfolioOptimizer(BaseOptimizer): self.scale_alpha = scale_alpha def __call__( - self, - S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None, + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -163,7 +163,7 @@ class PortfolioOptimizer(BaseOptimizer): return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) def _optimize_mvo( - self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None ) -> np.ndarray: """optimize mean-variance portfolio @@ -259,6 +259,7 @@ class PortfolioOptimizer(BaseOptimizer): # add l2 regularization wrapped_obj = obj if self.alpha > 0: + def opt_obj(x): return obj(x) + self.alpha * np.sum(np.square(x)) @@ -281,12 +282,21 @@ class EnhancedIndexingOptimizer(BaseOptimizer): This optimizer always assumes full investment and no-shorting. """ - START_FROM_W0 = 'w0' - START_FROM_BENCH = 'benchmark' - DO_NOT_START_FROM = 'no_warm_start' + START_FROM_W0 = "w0" + START_FROM_BENCH = "benchmark" + DO_NOT_START_FROM = "no_warm_start" - def __init__(self, lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, inds_dev: float = 0.01, - scale_alpha=True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, max_iters: int = 10000): + def __init__( + self, + lamb: float = 10, + delta: float = 0.4, + bench_dev: float = 0.01, + inds_dev: float = 0.01, + scale_alpha=True, + verbose: bool = False, + warm_start: str = DO_NOT_START_FROM, + max_iters: int = 10000, + ): """ Args: lamb (float): risk aversion parameter (larger `lamb` means less focus on return) @@ -310,18 +320,28 @@ class EnhancedIndexingOptimizer(BaseOptimizer): assert inds_dev >= 0, "industry deviation limit `inds_dev` should be positive" self.inds_dev = inds_dev - assert warm_start in [self.DO_NOT_START_FROM, self.START_FROM_W0, - self.START_FROM_BENCH], "illegal warm start option" - self.start_from_w0 = (warm_start == self.START_FROM_W0) - self.start_from_bench = (warm_start == self.START_FROM_BENCH) + assert warm_start in [ + self.DO_NOT_START_FROM, + self.START_FROM_W0, + self.START_FROM_BENCH, + ], "illegal warm start option" + self.start_from_w0 = warm_start == self.START_FROM_W0 + self.start_from_bench = warm_start == self.START_FROM_BENCH self.scale_alpha = scale_alpha self.verbose = verbose self.max_iters = max_iters - def __call__(self, u: np.ndarray, F: np.ndarray, covB: np.ndarray, varU: np.ndarray, w0: np.ndarray, - w_bench: np.ndarray, inds_onehot: np.ndarray - ) -> Union[np.ndarray, pd.Series]: + def __call__( + self, + u: np.ndarray, + F: np.ndarray, + covB: np.ndarray, + varU: np.ndarray, + w0: np.ndarray, + w_bench: np.ndarray, + inds_onehot: np.ndarray, + ) -> Union[np.ndarray, pd.Series]: """ Args: u (np.ndarray): expected returns (a.k.a., alpha) @@ -352,7 +372,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer): d_bench >= -self.bench_dev, d_bench <= self.bench_dev, d_inds >= -self.inds_dev, - d_inds <= self.inds_dev + d_inds <= self.inds_dev, ] if w0 is not None: turnover = cp.sum(cp.abs(w - w0)) @@ -361,7 +381,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer): warm_start = False if self.start_from_w0: if w0 is None: - print('Warning: try warm start with w0, but w0 is `None`.') + print("Warning: try warm start with w0, but w0 is `None`.") else: w.value = w0 warm_start = True @@ -372,7 +392,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer): prob = cp.Problem(obj, cons) prob.solve(solver=cp.SCS, verbose=self.verbose, warm_start=warm_start, max_iters=self.max_iters) - if prob.status != 'optimal': - print('Warning: solve failed.', prob.status) + if prob.status != "optimal": + print("Warning: solve failed.", prob.status) return np.asarray(w.value) diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py index f92e72787..eb6f9c5ed 100644 --- a/qlib/tests/__init__.py +++ b/qlib/tests/__init__.py @@ -18,10 +18,6 @@ class TestAutoData(unittest.TestCase): print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data( - name="qlib_data_simple", - region="cn", - interval="1d", - target_dir=provider_uri, - delete_old=False, + name="qlib_data_simple", region="cn", interval="1d", target_dir=provider_uri, delete_old=False, ) init(provider_uri=provider_uri, region=REG_CN, **cls._setup_kwargs) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index be458a24d..0c704b896 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -193,10 +193,7 @@ class SigAnaRecord(SignalRecord): } ) objects.update( - { - "long_short_r.pkl": long_short_r, - "long_avg_r.pkl": long_avg_r, - } + {"long_short_r.pkl": long_short_r, "long_avg_r.pkl": long_avg_r,} ) self.recorder.log_metrics(**metrics) self.recorder.save_objects(**objects, artifact_path=self.get_path()) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 743f89462..24526e332 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -39,13 +39,7 @@ class YahooData: INTERVAL_1d = "1d" def __init__( - self, - timezone: str = None, - start=None, - end=None, - interval="1d", - delay=0, - show_1min_logging: bool = False, + self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False, ): """ @@ -125,11 +119,7 @@ class YahooData: self._sleep() _remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval return self.get_data_from_remote( - symbol, - interval=_remote_interval, - start=start_, - end=end_, - show_1min_logging=self._show_1min_logging, + symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging, ) _result = None @@ -438,9 +428,7 @@ class YahooNormalize: DAILY_FORMAT = "%Y-%m-%d" def __init__( - self, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + self, date_field_name: str = "date", symbol_field_name: str = "symbol", ): """ @@ -458,10 +446,7 @@ class YahooNormalize: @staticmethod def normalize_yahoo( - df: pd.DataFrame, - calendar_list: list = None, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol", ): if df.empty: return df @@ -566,9 +551,7 @@ class YahooNormalize1min(YahooNormalize, ABC): CONSISTENT_1d = False def __init__( - self, - date_field_name: str = "date", - symbol_field_name: str = "symbol", + self, date_field_name: str = "date", symbol_field_name: str = "symbol", ): """ diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index 4811fd486..ab24fa9ca 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -153,22 +153,13 @@ class DumpDataBase: @staticmethod def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]: - return sorted( - map( - pd.Timestamp, - pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(), - ) - ) + return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),)) def _read_instruments(self, instrument_path: Path) -> pd.DataFrame: df = pd.read_csv( instrument_path, sep=self.INSTRUMENTS_SEP, - names=[ - self.symbol_field_name, - self.INSTRUMENTS_START_FIELD, - self.INSTRUMENTS_END_FIELD, - ], + names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,], ) return df diff --git a/setup.py b/setup.py index 6582054b9..d8a9d9efa 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ REQUIRED = [ "tornado", "joblib>=0.17.0", "ruamel.yaml>=0.16.12", - "scikit-learn>=0.22" + "scikit-learn>=0.22", ] # Numpy include @@ -70,16 +70,10 @@ with open(os.path.join(here, "README.md"), encoding="utf-8") as f: # Cython Extensions extensions = [ Extension( - "qlib.data._libs.rolling", - ["qlib/data/_libs/rolling.pyx"], - language="c++", - include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], ), Extension( - "qlib.data._libs.expanding", - ["qlib/data/_libs/expanding.pyx"], - language="c++", - include_dirs=[NUMPY_INCLUDE], + "qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE], ), ] @@ -98,9 +92,7 @@ setup( # py_modules=['qlib'], entry_points={ # 'console_scripts': ['mycli=mymodule:cli'], - "console_scripts": [ - "qrun=qlib.workflow.cli:run", - ], + "console_scripts": ["qrun=qlib.workflow.cli:run",], }, ext_modules=extensions, install_requires=REQUIRED, diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index f6e77cba4..8b3819c83 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -78,10 +78,7 @@ port_analysis_config = { "strategy": { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, + "kwargs": {"topk": 50, "n_drop": 5,}, }, "backtest": { "verbose": False, @@ -176,9 +173,7 @@ class TestAllFlow(TestAutoData): def test_1_backtest(self): analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) self.assertGreaterEqual( - analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], - 0.10, - "backtest failed", + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed", ) diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py index dfa7f8556..de649c37e 100644 --- a/tests/test_dump_data.py +++ b/tests/test_dump_data.py @@ -40,9 +40,7 @@ class TestDumpData(unittest.TestCase): TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod @@ -54,10 +52,7 @@ class TestDumpData(unittest.TestCase): def test_1_dump_calendars(self): ori_calendars = set( - map( - pd.Timestamp, - pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values, - ) + map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,) ) res_calendars = set(D.calendar()) assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed" diff --git a/tests/test_get_data.py b/tests/test_get_data.py index c511d1b91..d5637b025 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -26,9 +26,7 @@ class TestGetData(unittest.TestCase): def setUpClass(cls) -> None: provider_uri = str(QLIB_DIR.resolve()) qlib.init( - provider_uri=provider_uri, - expression_cache=None, - dataset_cache=None, + provider_uri=provider_uri, expression_cache=None, dataset_cache=None, ) @classmethod