From 215f7e0d22d1ce57e5769ce20a144cdb0c181ff9 Mon Sep 17 00:00:00 2001 From: Young Date: Sun, 11 Jul 2021 14:34:44 +0000 Subject: [PATCH 01/61] update version for release 0.7.0 --- qlib/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index 5f45f4557..996e442fa 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. -__version__ = "0.6.3.99" +__version__ = "0.7.0" __version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version diff --git a/setup.py b/setup.py index 92c9ccc0c..a633dc63e 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ NAME = "pyqlib" DESCRIPTION = "A Quantitative-research Platform" REQUIRES_PYTHON = ">=3.5.0" -VERSION = "0.6.3.99" +VERSION = "0.7.0" # Detect Cython try: From d5059e609fc8918eff621ce3305a0131c5fcecf3 Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 12 Jul 2021 02:49:25 +0000 Subject: [PATCH 02/61] change to dev version --- README.md | 1 + qlib/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b68cdaf10..de20dd796 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Recent released features | Feature | Status | | -- | ------ | +| Release Qlib v0.7.0 | [Released](https://github.com/microsoft/qlib/releases/tag/v0.7.0) on July 12, 2021 | | TCTS Model | [Released](https://github.com/microsoft/qlib/pull/491) on July 1, 2021 | | Online serving and automatic model rolling | :star: [Released](https://github.com/microsoft/qlib/pull/290) on May 17, 2021 | | DoubleEnsemble Model | [Released](https://github.com/microsoft/qlib/pull/286) on Mar 2, 2021 | diff --git a/qlib/__init__.py b/qlib/__init__.py index 996e442fa..6f76bbcaa 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. -__version__ = "0.7.0" +__version__ = "0.7.0.99" __version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version diff --git a/setup.py b/setup.py index a633dc63e..6a037a9e5 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ NAME = "pyqlib" DESCRIPTION = "A Quantitative-research Platform" REQUIRES_PYTHON = ">=3.5.0" -VERSION = "0.7.0" +VERSION = "0.7.0.99" # Detect Cython try: From b504cc6ac8d6f111591a25e8a838e24c1713dd3b Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Mon, 12 Jul 2021 21:51:08 +0800 Subject: [PATCH 03/61] update readme and rst --- README.md | 3 ++- docs/developer/code_standard.rst | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 docs/developer/code_standard.rst diff --git a/README.md b/README.md index de20dd796..1e3191598 100644 --- a/README.md +++ b/README.md @@ -396,7 +396,8 @@ the right to use your contribution. For details, visit https://cla.opensource.mi When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +provided by the bot. You will only need to do this once across all repos using our CLA. Here are some +[code standards](docs/developer/code_standard.rst) when you submit a pull request. This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or diff --git a/docs/developer/code_standard.rst b/docs/developer/code_standard.rst new file mode 100644 index 000000000..23ea713ba --- /dev/null +++ b/docs/developer/code_standard.rst @@ -0,0 +1,20 @@ +.. _code_standard: + +================================= +Code Standard +================================= + +Docstring +================================= +Please use the Numpy Style. + +Continuous Integration +================================= +Continuous Integration (CI) tools help you stick to the quality standards by running tests every time you push a new commit and reporting the results to a pull request. + +A common error is the mixed use of space and tab. You can fix the bug by inputing the following code in the command line. + +.. code-block:: python + + pip install black + python -m black . -l 120 \ No newline at end of file From 4610e16ac2d85ef8f2c0779654b29d4ae2869a84 Mon Sep 17 00:00:00 2001 From: Gaurav Chauhan <2796gaurav@gmail.com> Date: Tue, 13 Jul 2021 07:16:13 +0530 Subject: [PATCH 04/61] updated readme of yahoo collector where region parameter was incorrect (#504) * updated readme of yahoo collector where region parameter was incorrect * changes update readme of yahoo collector where region parameter was incorrect * update readme of yahoo collector update readme of yahoo collector where region parameter was incorrect * updated changes * updated readme of cn1d data Co-authored-by: Gaurav Chauhan01/HO/Analytics/General --- scripts/data_collector/yahoo/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/data_collector/yahoo/README.md b/scripts/data_collector/yahoo/README.md index 6cc630e87..50f731e38 100644 --- a/scripts/data_collector/yahoo/README.md +++ b/scripts/data_collector/yahoo/README.md @@ -71,7 +71,7 @@ pip install -r requirements.txt - examples: ```bash # cn 1d data - python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region US + python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --start 2020-01-01 --end 2020-12-31 --delay 1 --interval 1d --region CN # cn 1min data python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --delay 1 --interval 1min --region CN # us 1d data From 79026e5390acd8f719c539e4c467d7f67895dd55 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Fri, 25 Jun 2021 09:38:49 +0800 Subject: [PATCH 05/61] fix bug that duplicate rows will cause reindex failed when dumping with csv files --- scripts/dump_bin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index 83daa28bc..8e9878895 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -244,6 +244,10 @@ class DumpDataBase: if df is None or df.empty: logger.warning(f"{code} data is None or empty") return + + # try to remove dup rows or it will cause exception when reindex. + df = df.drop_duplicates(self.date_field_name) + # features save dir features_dir = self._features_dir.joinpath(code_to_fname(code).lower()) features_dir.mkdir(parents=True, exist_ok=True) From a8974ce5354d1db5982f498cdbef522e14bade27 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Wed, 7 Jul 2021 15:14:05 +0800 Subject: [PATCH 06/61] bug fix: ClientProvider cannot set connection to calendar and instrument providers --- qlib/data/data.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/qlib/data/data.py b/qlib/data/data.py index eb7fbe0ea..27c181164 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -1028,13 +1028,21 @@ class ClientProvider(BaseProvider): """ def __init__(self): + def is_instance_of_provider(instance: object, cls: type): + if isinstance(instance, Wrapper): + p = getattr(instance, "_provider", None) + + return False if p is None else isinstance(p, cls) + + return isinstance(instance, cls) + from .client import Client self.client = Client(C.flask_server, C.flask_port) self.logger = get_module_logger(self.__class__.__name__) - if isinstance(Cal, ClientCalendarProvider): + if is_instance_of_provider(Cal, ClientCalendarProvider): Cal.set_conn(self.client) - if isinstance(Inst, ClientInstrumentProvider): + if is_instance_of_provider(Inst, ClientInstrumentProvider): Inst.set_conn(self.client) if hasattr(DatasetD, "provider"): DatasetD.provider.set_conn(self.client) From be8653c5053e57b3a34e1f76481bf73383f42932 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Wed, 14 Jul 2021 09:56:12 +0800 Subject: [PATCH 07/61] Update contributing section --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1e3191598..8668ccc14 100644 --- a/README.md +++ b/README.md @@ -390,14 +390,18 @@ Join IM discussion groups: # Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a +This project welcomes contributions and suggestions. +**Here are some +[code standards](docs/developer/code_standard.rst) when you submit a pull request.** + + +Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the right to use your contribution. For details, visit https://cla.opensource.microsoft.com. When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. Here are some -[code standards](docs/developer/code_standard.rst) when you submit a pull request. +provided by the bot. You will only need to do this once across all repos using our CLA. This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or From 6d91f28474cb2a2e6326e7c75c6f4a3138ae5774 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Wed, 14 Jul 2021 10:07:02 +0800 Subject: [PATCH 08/61] Update README.md --- examples/benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index 1920a6a3b..fc97657d0 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -1,6 +1,6 @@ # Benchmarks Performance -Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs. +Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs with different random seeds. The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results. From 4ec41ea0e7fc74d94e0336948df9a11a53f8365d Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 14 Jul 2021 23:23:04 +0800 Subject: [PATCH 09/61] Add a check if change is mutated to YahooNormalize1d --- .../future_trading_date_collector.py | 1 + scripts/data_collector/yahoo/collector.py | 27 ++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/scripts/data_collector/contrib/future_trading_date_collector/future_trading_date_collector.py b/scripts/data_collector/contrib/future_trading_date_collector/future_trading_date_collector.py index 8df0a4972..939ba7f6a 100644 --- a/scripts/data_collector/contrib/future_trading_date_collector/future_trading_date_collector.py +++ b/scripts/data_collector/contrib/future_trading_date_collector/future_trading_date_collector.py @@ -78,6 +78,7 @@ def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"): data_list.append(_row_data[0]) data_list = sorted(data_list) date_list = generate_qlib_calendar(data_list, freq=freq) + date_list = sorted(set(daily_calendar.loc[:, 0].values.tolist() + date_list)) write_calendar_to_qlib(qlib_dir, date_list, freq=freq) bs.logout() logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31") diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 6a128a5be..e0e6e0368 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -283,6 +283,16 @@ class YahooNormalize(BaseNormalize): COLUMNS = ["open", "close", "high", "low", "volume"] DAILY_FORMAT = "%Y-%m-%d" + @staticmethod + def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series: + df = df.copy() + _tmp_series = df["close"].fillna(method="ffill") + _tmp_shift_series = _tmp_series.shift(1) + if last_close is not None: + _tmp_shift_series.iloc[0] = float(last_close) + change_series = _tmp_series / _tmp_shift_series - 1 + return change_series + @staticmethod def normalize_yahoo( df: pd.DataFrame, @@ -310,11 +320,16 @@ class YahooNormalize(BaseNormalize): ) df.sort_index(inplace=True) df.loc[(df["volume"] <= 0) | np.isnan(df["volume"]), set(df.columns) - {symbol_field_name}] = np.nan - _tmp_series = df["close"].fillna(method="ffill") - _tmp_shift_series = _tmp_series.shift(1) - if last_close is not None: - _tmp_shift_series.iloc[0] = float(last_close) - df["change"] = _tmp_series / _tmp_shift_series - 1 + + change_series = YahooNormalize.calc_change(df, last_close) + # NOTE: The data obtained by Yahoo finance sometimes has exceptions + # WARNING: If it is normal for a `symbol(exchange)` to differ by a factor of *89* to *111* for consecutive trading days, + # WARNING: the logic in the following line needs to be modified + _mask = (change_series >= 89) & (change_series <= 111) + _tmp_cols = ["high", "close", "low", "open", "adjclose"] + df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100 + df["change"] = YahooNormalize.calc_change(df, last_close) + columns += ["change"] df.loc[(df["volume"] <= 0) | np.isnan(df["volume"]), columns] = np.nan @@ -852,7 +867,7 @@ class Run(BaseRun): if self.interval.lower() == "1min": if qlib_data_1d_dir is None or not Path(qlib_data_1d_dir).expanduser().exists(): raise ValueError( - "If normalize 1min, the qlib_data_1d_dir parameter must be set: --qlib_data_1d_dir , Reference: https://github.com/zhupr/qlib/tree/support_extend_data/scripts/data_collector/yahoo#automatic-update-of-daily-frequency-datafrom-yahoo-finance" + "If normalize 1min, the qlib_data_1d_dir parameter must be set: --qlib_data_1d_dir , Reference: https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#automatic-update-of-daily-frequency-datafrom-yahoo-finance" ) super(Run, self).normalize_data( date_field_name, symbol_field_name, end_date=end_date, qlib_data_1d_dir=qlib_data_1d_dir From 3b8087677c6e692f4dfcc289f04df98ea8689ce1 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Fri, 16 Jul 2021 12:24:33 +0800 Subject: [PATCH 10/61] Update online.rst --- docs/component/online.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/component/online.rst b/docs/component/online.rst index accc936dd..22a6afaf9 100644 --- a/docs/component/online.rst +++ b/docs/component/online.rst @@ -21,6 +21,8 @@ which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online S If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_. The `examples `_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``. +**NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts `_ to help users update Yahoo daily data. + Online Manager ============= @@ -43,4 +45,4 @@ Updater ============= .. automodule:: qlib.workflow.online.update - :members: \ No newline at end of file + :members: From 65b44349cd9ccb8d6aba04e4e4a434636234895e Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 16 Jul 2021 08:29:32 +0000 Subject: [PATCH 11/61] add PandasQuote --- qlib/backtest/exchange.py | 272 ++++++++++++++++++++++++-------------- 1 file changed, 171 insertions(+), 101 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 58f57ed73..8d4739251 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -102,10 +102,11 @@ class Exchange: # TODO: the quote, trade_dates, codes are not necessray. # It is just for performance consideration. + self.limit_type = BaseQuote._get_limit_type(limit_threshold) if limit_threshold is None: if C.region == REG_CN: self.logger.warning(f"limit_threshold not set. The stocks hit the limit may be bought/sold") - elif self._get_limit_type(limit_threshold) == self.LT_FLT and abs(limit_threshold) > 0.1: + elif self.limit_type == BaseQuote.LT_FLT and abs(limit_threshold) > 0.1: if C.region == REG_CN: self.logger.warning(f"limit_threshold may not be set to a reasonable value") @@ -127,10 +128,9 @@ class Exchange: # $change is for calculating the limit of the stock necessary_fields = {self.buy_price, self.sell_price, "$close", "$change", "$factor", "$volume"} - if self._get_limit_type(limit_threshold) == self.LT_TP_EXP: + if self.limit_type == BaseQuote.LT_TP_EXP: for exp in limit_threshold: necessary_fields.add(exp) - subscribe_fields = list(necessary_fields | set(subscribe_fields)) all_fields = list(necessary_fields | set(subscribe_fields)) self.all_fields = all_fields @@ -140,94 +140,22 @@ class Exchange: self.limit_threshold: Union[Tuple[str, str], float, None] = limit_threshold self.volume_threshold = volume_threshold self.extra_quote = extra_quote - self.set_quote(codes, start_time, end_time) - def set_quote(self, codes, start_time, end_time): - if len(codes) == 0: - codes = D.instruments() - - self.quote = D.features(codes, self.all_fields, start_time, end_time, freq=self.freq, disk_cache=True).dropna( - subset=["$close"] + # init quote + self.quote = PandasQuote( + start_time = self.start_time, + end_time = self.end_time, + freq = self.freq, + codes = self.codes, + all_fields = self.all_fields, + limit_threshold = self.limit_threshold, + buy_price = self.buy_price, + sell_price = self.sell_price, + extra_quote = self.extra_quote, ) - self.quote.columns = self.all_fields - - for attr in "buy_price", "sell_price": - pstr = getattr(self, attr) # price string - if self.quote[pstr].isna().any(): - self.logger.warning("{} field data contains nan.".format(pstr)) - - if self.quote["$factor"].isna().any(): - # The 'factor.day.bin' file not exists, and `factor` field contains `nan` - # Use adjusted price - self.trade_w_adj_price = True - self.logger.warning("factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.") - if self.trade_unit is not None: - self.logger.warning(f"trade unit {self.trade_unit} is not supported in adjusted_price mode.") - - else: - # The `factor.day.bin` file exists and all data `close` and `factor` are not `nan` - # Use normal price - self.trade_w_adj_price = False - - # update limit - self._update_limit() - - quote_df = self.quote - if self.extra_quote is not None: - # process extra_quote - if "$close" not in self.extra_quote: - raise ValueError("$close is necessray in extra_quote") - for attr in "buy_price", "sell_price": - pstr = getattr(self, attr) # price string - if pstr not in self.extra_quote.columns: - self.extra_quote[pstr] = self.extra_quote["$close"] - self.logger.warning(f"No {pstr} set for extra_quote. Use $close as {pstr}.") - if "$factor" not in self.extra_quote.columns: - self.extra_quote["$factor"] = 1.0 - self.logger.warning("No $factor set for extra_quote. Use 1.0 as $factor.") - if "limit_sell" not in self.extra_quote.columns: - self.extra_quote["limit_sell"] = False - self.logger.warning("No limit_sell set for extra_quote. All stock will be able to be sold.") - if "limit_buy" not in self.extra_quote.columns: - self.extra_quote["limit_buy"] = False - self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") - - assert set(self.extra_quote.columns) == set(quote_df.columns) - {"$change"} - quote_df = pd.concat([quote_df, self.extra_quote], sort=False, axis=0) - - quote_dict = {} - for stock_id, stock_val in quote_df.groupby(level="instrument"): - quote_dict[stock_id] = stock_val.droplevel(level="instrument") - - self.quote = quote_dict - - LT_TP_EXP = "(exp)" # Tuple[str, str] - LT_FLT = "float" # float - LT_NONE = "none" # none - - def _get_limit_type(self, limit_threshold): - if isinstance(limit_threshold, Tuple): - return self.LT_TP_EXP - elif isinstance(limit_threshold, float): - return self.LT_FLT - elif limit_threshold is None: - return self.LT_NONE - else: - raise NotImplementedError(f"This type of `limit_threshold` is not supported") - - def _update_limit(self): - # check limit_threshold - lt_type = self._get_limit_type(self.limit_threshold) - if lt_type == self.LT_NONE: - self.quote["limit_buy"] = False - self.quote["limit_sell"] = False - elif lt_type == self.LT_TP_EXP: - # set limit - self.quote["limit_buy"] = self.quote[self.limit_threshold[0]] - self.quote["limit_sell"] = self.quote[self.limit_threshold[1]] - elif lt_type == self.LT_FLT: - self.quote["limit_buy"] = self.quote["$change"].ge(self.limit_threshold) - self.quote["limit_sell"] = self.quote["$change"].le(-self.limit_threshold) # pylint: disable=E1130 + self.trade_w_adj_price = self.quote.get_trade_w_adj_price() + if(self.trade_w_adj_price and (self.trade_unit is not None)): + self.logger.warning(f"trade unit {self.trade_unit} is not supported in adjusted_price mode.") def check_stock_limit(self, stock_id, start_time, end_time, direction=None): """ @@ -241,20 +169,20 @@ class Exchange: """ if direction is None: - buy_limit = resam_ts_data(self.quote[stock_id]["limit_buy"], start_time, end_time, method="all") - sell_limit = resam_ts_data(self.quote[stock_id]["limit_sell"], start_time, end_time, method="all") + buy_limit = self.quote.get_data(stock_id, start_time, end_time, fields="limit_buy", method="all") + sell_limit = self.quote.get_data(stock_id, start_time, end_time, fields="limit_sell", method="all") return buy_limit or sell_limit elif direction == Order.BUY: - return resam_ts_data(self.quote[stock_id]["limit_buy"], start_time, end_time, method="all") + return self.quote.get_data(stock_id, start_time, end_time, fields="limit_buy", method="all") elif direction == Order.SELL: - return resam_ts_data(self.quote[stock_id]["limit_sell"], start_time, end_time, method="all") + return self.quote.get_data(stock_id, start_time, end_time, fields="limit_sell", method="all") else: raise ValueError(f"direction {direction} is not supported!") def check_stock_suspended(self, stock_id, start_time, end_time): # is suspended - if stock_id in self.quote: - return resam_ts_data(self.quote[stock_id], start_time, end_time, method=None) is None + if stock_id in self.quote.get_all_stock(): + return self.quote.get_data(stock_id, start_time, end_time) is None else: return True @@ -313,13 +241,13 @@ class Exchange: return trade_val, trade_cost, trade_price def get_quote_info(self, stock_id, start_time, end_time, method=ts_data_last): - return resam_ts_data(self.quote[stock_id], start_time, end_time, method=method) + return self.quote.get_data(stock_id, start_time, end_time, method=method) def get_close(self, stock_id, start_time, end_time, method=ts_data_last): - return resam_ts_data(self.quote[stock_id]["$close"], start_time, end_time, method=method) + return self.quote.get_data(stock_id, start_time, end_time, fields="$close", method=method) def get_volume(self, stock_id, start_time, end_time, method="sum"): - return resam_ts_data(self.quote[stock_id]["$volume"], start_time, end_time, method=method) + return self.quote.get_data(stock_id, start_time, end_time, fields="$volume", method=method) def get_deal_price(self, stock_id, start_time, end_time, direction: OrderDir, method=ts_data_last): if direction == OrderDir.SELL: @@ -328,7 +256,7 @@ class Exchange: pstr = self.buy_price else: raise NotImplementedError(f"This type of input is not supported") - deal_price = resam_ts_data(self.quote[stock_id][pstr], start_time, end_time, method=method) + deal_price = self.quote.get_data(stock_id, start_time, end_time, fields=pstr, method=method) if method is not None and (np.isclose(deal_price, 0.0) or np.isnan(deal_price)): self.logger.warning(f"(stock_id:{stock_id}, trade_time:{(start_time, end_time)}, {pstr}): {deal_price}!!!") self.logger.warning(f"setting deal_price to close price") @@ -343,9 +271,9 @@ class Exchange: `None`: if the stock is suspended `None` may be returned `float`: return factor if the factor exists """ - if stock_id not in self.quote: + if stock_id not in self.quote.get_all_stock(): return None - return resam_ts_data(self.quote[stock_id]["$factor"], start_time, end_time, method=ts_data_last) + return self.quote.get_data(stock_id, start_time, end_time, fields="$factor", method=ts_data_last) def generate_amount_position_from_weight_position( self, weight_position, cash, start_time, end_time, direction=OrderDir.BUY @@ -596,3 +524,145 @@ class Exchange: # cache to avoid recreate the same instance self._order_helper = OrderHelper(self) return self._order_helper + + +class BaseQuote: + + def __init__(self): + self.logger = get_module_logger("online operator", level=logging.INFO) + + def _update_limit(self, limit_threshold): + raise NotImplementedError(f"Please implement the `_update_limit` method") + + def get_trade_w_adj_price(self): + raise NotImplementedError(f"Please implement the `get_trade_w_adj_price` method") + + def get_all_stock(self): + raise NotImplementedError(f"Please implement the `get_all_stock` method") + + def get_data(self, stock_id, start_time, end_time, fields, method): + raise NotImplementedError(f"Please implement the `get_data` method") + + LT_TP_EXP = "(exp)" # Tuple[str, str] + LT_FLT = "float" # float + LT_NONE = "none" # none + + @staticmethod + def _get_limit_type(limit_threshold): + if isinstance(limit_threshold, Tuple): + return BaseQuote.LT_TP_EXP + elif isinstance(limit_threshold, float): + return BaseQuote.LT_FLT + elif limit_threshold is None: + return BaseQuote.LT_NONE + else: + raise NotImplementedError(f"This type of `limit_threshold` is not supported") + + +class PandasQuote(BaseQuote): + + def __init__( + self, + start_time, + end_time, + freq, + codes, + all_fields, + limit_threshold, + buy_price, + sell_price, + extra_quote + ): + + super().__init__() + + # get stock data from qlib + if len(codes) == 0: + codes = D.instruments() + self.data = D.features( + codes, + all_fields, + start_time, + end_time, + freq=freq, + disk_cache=True + ).dropna(subset=["$close"]) + self.data.columns = all_fields + + # check buy_price data and sell_price data + self.buy_price = buy_price + self.sell_price = sell_price + for attr in "buy_price", "sell_price": + pstr = getattr(self, attr) # price string + if self.data[pstr].isna().any(): + self.logger.warning("{} field data contains nan.".format(pstr)) + + # update trade_w_adj_price + if self.data["$factor"].isna().any(): + # The 'factor.day.bin' file not exists, and `factor` field contains `nan` + # Use adjusted price + self.logger.warning("factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.") + self.trade_w_adj_price = True + else: + # The `factor.day.bin` file exists and all data `close` and `factor` are not `nan` + # Use normal price + self.trade_w_adj_price = False + + # update limit + self._update_limit(limit_threshold) + + # concat extra_quote + quote_df = self.data + if extra_quote is not None: + # process extra_quote + if "$close" not in extra_quote: + raise ValueError("$close is necessray in extra_quote") + for attr in "buy_price", "sell_price": + pstr = getattr(self, attr) # price string + if pstr not in extra_quote.columns: + extra_quote[pstr] = extra_quote["$close"] + self.logger.warning(f"No {pstr} set for extra_quote. Use $close as {pstr}.") + if "$factor" not in extra_quote.columns: + extra_quote["$factor"] = 1.0 + self.logger.warning("No $factor set for extra_quote. Use 1.0 as $factor.") + if "limit_sell" not in extra_quote.columns: + extra_quote["limit_sell"] = False + self.logger.warning("No limit_sell set for extra_quote. All stock will be able to be sold.") + if "limit_buy" not in extra_quote.columns: + extra_quote["limit_buy"] = False + self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") + assert set(extra_quote.columns) == set(quote_df.columns) - {"$change"} + quote_df = pd.concat([quote_df, extra_quote], sort=False, axis=0) + + quote_dict = {} + for stock_id, stock_val in quote_df.groupby(level="instrument"): + quote_dict[stock_id] = stock_val.droplevel(level="instrument") + self.data = quote_dict + + def _update_limit(self, limit_threshold): + # check limit_threshold + limit_type = self._get_limit_type(limit_threshold) + if limit_type == self.LT_NONE: + self.data["limit_buy"] = False + self.data["limit_sell"] = False + elif limit_type == self.LT_TP_EXP: + # set limit + self.data["limit_buy"] = self.data[limit_threshold[0]] + self.data["limit_sell"] = self.data[limit_threshold[1]] + elif limit_type == self.LT_FLT: + self.data["limit_buy"] = self.data["$change"].ge(limit_threshold) + self.data["limit_sell"] = self.data["$change"].le(-limit_threshold) # pylint: disable=E1130 + + def get_all_stock(self): + return self.data.keys() + + def get_data(self, stock_id, start_time, end_time, fields = None, method = None): + if(fields is None): + return resam_ts_data(self.data[stock_id], start_time, end_time, method=method) + elif(isinstance(fields, (str, list))): + return resam_ts_data(self.data[stock_id][fields], start_time, end_time, method=method) + else: + raise ValueError(f"fields must be None, str or list") + + def get_trade_w_adj_price(self): + return self.trade_w_adj_price \ No newline at end of file From 110141ddac97dbeeed1723a7103fb9d777d223c6 Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 16 Jul 2021 09:17:29 +0000 Subject: [PATCH 12/61] add doc --- qlib/backtest/exchange.py | 65 +++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 8d4739251..2e865d591 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -532,15 +532,24 @@ class BaseQuote: self.logger = get_module_logger("online operator", level=logging.INFO) def _update_limit(self, limit_threshold): + """add limitation information to data based on limit_threshold + """ raise NotImplementedError(f"Please implement the `_update_limit` method") def get_trade_w_adj_price(self): + """return whether use the trade price with adjusted weight + """ raise NotImplementedError(f"Please implement the `get_trade_w_adj_price` method") def get_all_stock(self): + """return all stock codes + """ raise NotImplementedError(f"Please implement the `get_all_stock` method") - def get_data(self, stock_id, start_time, end_time, fields, method): + def get_data(self, stock_id, start_time, end_time, fields=None, method=None): + """get the specific fields of stock data during start time and end_time, + and apply method to the data, please refer to resam_ts_data + """ raise NotImplementedError(f"Please implement the `get_data` method") LT_TP_EXP = "(exp)" # Tuple[str, str] @@ -549,6 +558,8 @@ class BaseQuote: @staticmethod def _get_limit_type(limit_threshold): + """get limit type + """ if isinstance(limit_threshold, Tuple): return BaseQuote.LT_TP_EXP elif isinstance(limit_threshold, float): @@ -560,6 +571,8 @@ class BaseQuote: class PandasQuote(BaseQuote): + """ + """ def __init__( self, @@ -567,12 +580,52 @@ class PandasQuote(BaseQuote): end_time, freq, codes, - all_fields, - limit_threshold, - buy_price, - sell_price, - extra_quote + all_fields: List[str], + limit_threshold: Union[Tuple[str, str], float, None], + buy_price: str, + sell_price: str, + extra_quote: pd.DataFrame, ): + """init stock data based on pandas + + Parameters + ---------- + start_time : pd.Timestamp|str + closed start time for backtest + end_time : pd.Timestamp|str + closed end time for backtest + freq : str + frequency of data + codes : [type] + all stock code + all_fields : List[str] + all subscribe fields in qlib + limit_threshold : Union[Tuple[str, str], float, None] + 1) `None`: no limitation + 2) float, 0.1 for example, default None + 3) Tuple[str, str]: (, + ) + `False` value indicates the stock is tradable + `True` value indicates the stock is limited and not tradable + buy_price : str + the data field for buying stock + sell_price : str + the data field for selling stock + extra_quote : pd.DataFrame + columns: like ['$vwap', '$close', '$volume', '$factor', 'limit_sell', 'limit_buy']. + The limit indicates that the etf is tradable on a specific day. + Necessary fields: + $close is for calculating the total value at end of each day. + Optional fields: + $volume is only necessary when we limit the trade amount or caculate PA(vwap) indicator + $vwap is only necessary when we use the $vwap price as the deal price + $factor is for rounding to the trading unit + limit_sell will be set to False by default(False indicates we can sell this + target on this day). + limit_buy will be set to False by default(False indicates we can buy this + target on this day). + index: MultipleIndex(instrument, pd.Datetime) + """ super().__init__() From 567841e1c663964b41e6d4bcfb0689540c43d2b5 Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 16 Jul 2021 12:56:49 +0000 Subject: [PATCH 13/61] get qlib data in exchange --- qlib/backtest/exchange.py | 310 +++++++++++++++++--------------------- 1 file changed, 139 insertions(+), 171 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 2e865d591..82f57462e 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -102,11 +102,11 @@ class Exchange: # TODO: the quote, trade_dates, codes are not necessray. # It is just for performance consideration. - self.limit_type = BaseQuote._get_limit_type(limit_threshold) + self.limit_type = self._get_limit_type(limit_threshold) if limit_threshold is None: if C.region == REG_CN: self.logger.warning(f"limit_threshold not set. The stocks hit the limit may be bought/sold") - elif self.limit_type == BaseQuote.LT_FLT and abs(limit_threshold) > 0.1: + elif self.limit_type == self.LT_FLT and abs(limit_threshold) > 0.1: if C.region == REG_CN: self.logger.warning(f"limit_threshold may not be set to a reasonable value") @@ -128,7 +128,7 @@ class Exchange: # $change is for calculating the limit of the stock necessary_fields = {self.buy_price, self.sell_price, "$close", "$change", "$factor", "$volume"} - if self.limit_type == BaseQuote.LT_TP_EXP: + if self.limit_type == self.LT_TP_EXP: for exp in limit_threshold: necessary_fields.add(exp) all_fields = list(necessary_fields | set(subscribe_fields)) @@ -140,22 +140,98 @@ class Exchange: self.limit_threshold: Union[Tuple[str, str], float, None] = limit_threshold self.volume_threshold = volume_threshold self.extra_quote = extra_quote + self.get_quote_from_qlib() - # init quote - self.quote = PandasQuote( - start_time = self.start_time, - end_time = self.end_time, - freq = self.freq, - codes = self.codes, - all_fields = self.all_fields, - limit_threshold = self.limit_threshold, - buy_price = self.buy_price, - sell_price = self.sell_price, - extra_quote = self.extra_quote, - ) - self.trade_w_adj_price = self.quote.get_trade_w_adj_price() - if(self.trade_w_adj_price and (self.trade_unit is not None)): - self.logger.warning(f"trade unit {self.trade_unit} is not supported in adjusted_price mode.") + # init quote by quote_df + self.quote = PandasQuote(self.quote_df) + + def get_quote_from_qlib(self): + # get stock data from qlib + if len(self.codes) == 0: + self.codes = D.instruments() + self.quote_df = D.features( + self.codes, + self.all_fields, + self.start_time, + self.end_time, + freq=self.freq, + disk_cache=True + ).dropna(subset=["$close"]) + self.quote_df.columns = self.all_fields + + # check buy_price data and sell_price data + for attr in "buy_price", "sell_price": + pstr = getattr(self, attr) # price string + if self.quote_df[pstr].isna().any(): + self.logger.warning("{} field data contains nan.".format(pstr)) + + # update trade_w_adj_price + if self.quote_df["$factor"].isna().any(): + # The 'factor.day.bin' file not exists, and `factor` field contains `nan` + # Use adjusted price + self.trade_w_adj_price = True + self.logger.warning("factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.") + if self.trade_unit is not None: + self.logger.warning(f"trade unit {self.trade_unit} is not supported in adjusted_price mode.") + else: + # The `factor.day.bin` file exists and all data `close` and `factor` are not `nan` + # Use normal price + self.trade_w_adj_price = False + + # update limit + self._update_limit(self.limit_threshold) + + # concat extra_quote + if self.extra_quote is not None: + # process extra_quote + if "$close" not in self.extra_quote: + raise ValueError("$close is necessray in extra_quote") + for attr in "buy_price", "sell_price": + pstr = getattr(self, attr) # price string + if pstr not in self.extra_quote.columns: + self.extra_quote[pstr] = self.extra_quote["$close"] + self.logger.warning(f"No {pstr} set for extra_quote. Use $close as {pstr}.") + if "$factor" not in self.extra_quote.columns: + self.extra_quote["$factor"] = 1.0 + self.logger.warning("No $factor set for extra_quote. Use 1.0 as $factor.") + if "limit_sell" not in self.extra_quote.columns: + self.extra_quote["limit_sell"] = False + self.logger.warning("No limit_sell set for extra_quote. All stock will be able to be sold.") + if "limit_buy" not in self.extra_quote.columns: + self.extra_quote["limit_buy"] = False + self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") + assert set(self.extra_quote.columns) == set(self.quote_df.columns) - {"$change"} + self.quote_df = pd.concat([self.quote_df, extra_quote], sort=False, axis=0) + + LT_TP_EXP = "(exp)" # Tuple[str, str] + LT_FLT = "float" # float + LT_NONE = "none" # none + + def _get_limit_type(self, limit_threshold): + """get limit type + """ + if isinstance(limit_threshold, Tuple): + return self.LT_TP_EXP + elif isinstance(limit_threshold, float): + return self.LT_FLT + elif limit_threshold is None: + return self.LT_NONE + else: + raise NotImplementedError(f"This type of `limit_threshold` is not supported") + + def _update_limit(self, limit_threshold): + # check limit_threshold + limit_type = self._get_limit_type(limit_threshold) + if limit_type == self.LT_NONE: + self.quote_df["limit_buy"] = False + self.quote_df["limit_sell"] = False + elif limit_type == self.LT_TP_EXP: + # set limit + self.quote_df["limit_buy"] = self.quote_df[limit_threshold[0]] + self.quote_df["limit_sell"] = self.quote_df[limit_threshold[1]] + elif limit_type == self.LT_FLT: + self.quote_df["limit_buy"] = self.quote_df["$change"].ge(limit_threshold) + self.quote_df["limit_sell"] = self.quote_df["$change"].le(-limit_threshold) # pylint: disable=E1130 def check_stock_limit(self, stock_id, start_time, end_time, direction=None): """ @@ -528,184 +604,79 @@ class Exchange: class BaseQuote: - def __init__(self): + def __init__(self, quote_df: pd.DataFrame): self.logger = get_module_logger("online operator", level=logging.INFO) - def _update_limit(self, limit_threshold): - """add limitation information to data based on limit_threshold - """ - raise NotImplementedError(f"Please implement the `_update_limit` method") - - def get_trade_w_adj_price(self): - """return whether use the trade price with adjusted weight - """ - raise NotImplementedError(f"Please implement the `get_trade_w_adj_price` method") - def get_all_stock(self): """return all stock codes + + Return + ------ + Union[list, Dict.keys(), set, tuple] + all stock codes """ raise NotImplementedError(f"Please implement the `get_all_stock` method") - def get_data(self, stock_id, start_time, end_time, fields=None, method=None): + def get_data(self, stock_id: str, start_time, end_time, fields: Union[str, list]=None, method=None): """get the specific fields of stock data during start time and end_time, - and apply method to the data, please refer to resam_ts_data - """ - raise NotImplementedError(f"Please implement the `get_data` method") + and apply method to the data. + + Example: + .. code-block:: + $close $volume + instrument datetime + SH600000 2010-01-04 86.778313 16162960.0 + 2010-01-05 87.433578 28117442.0 + 2010-01-06 85.713585 23632884.0 + 2010-01-07 83.788803 20813402.0 + 2010-01-08 84.730675 16044853.0 - LT_TP_EXP = "(exp)" # Tuple[str, str] - LT_FLT = "float" # float - LT_NONE = "none" # none + SH600655 2010-01-04 2699.567383 158193.328125 + 2010-01-08 2612.359619 77501.406250 + 2010-01-11 2712.982422 160852.390625 + 2010-01-12 2788.688232 164587.937500 + 2010-01-13 2790.604004 145460.453125 - @staticmethod - def _get_limit_type(limit_threshold): - """get limit type - """ - if isinstance(limit_threshold, Tuple): - return BaseQuote.LT_TP_EXP - elif isinstance(limit_threshold, float): - return BaseQuote.LT_FLT - elif limit_threshold is None: - return BaseQuote.LT_NONE - else: - raise NotImplementedError(f"This type of `limit_threshold` is not supported") + print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) + + $close 87.433578 + $volume 28117442.0 + print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields="$close", method="last")) -class PandasQuote(BaseQuote): - """ - """ - - def __init__( - self, - start_time, - end_time, - freq, - codes, - all_fields: List[str], - limit_threshold: Union[Tuple[str, str], float, None], - buy_price: str, - sell_price: str, - extra_quote: pd.DataFrame, - ): - """init stock data based on pandas + 87.433578 Parameters ---------- + stock_id: Union[str, list] start_time : pd.Timestamp|str closed start time for backtest end_time : pd.Timestamp|str closed end time for backtest - freq : str - frequency of data - codes : [type] - all stock code - all_fields : List[str] - all subscribe fields in qlib - limit_threshold : Union[Tuple[str, str], float, None] - 1) `None`: no limitation - 2) float, 0.1 for example, default None - 3) Tuple[str, str]: (, - ) - `False` value indicates the stock is tradable - `True` value indicates the stock is limited and not tradable - buy_price : str - the data field for buying stock - sell_price : str - the data field for selling stock - extra_quote : pd.DataFrame - columns: like ['$vwap', '$close', '$volume', '$factor', 'limit_sell', 'limit_buy']. - The limit indicates that the etf is tradable on a specific day. - Necessary fields: - $close is for calculating the total value at end of each day. - Optional fields: - $volume is only necessary when we limit the trade amount or caculate PA(vwap) indicator - $vwap is only necessary when we use the $vwap price as the deal price - $factor is for rounding to the trading unit - limit_sell will be set to False by default(False indicates we can sell this - target on this day). - limit_buy will be set to False by default(False indicates we can buy this - target on this day). - index: MultipleIndex(instrument, pd.Datetime) + fields : Union[str, List] + the columns of data to fetch + method : Union[str, Callable] + the method apply to data. + e.g ["None", "last", "all", "sum", "mean", qlib/utils/resam.py/ts_data_last] + + Return + ---------- + Union[None, float, pd.Series] + The resampled Series/value, return None when the resampled data is empty. """ - super().__init__() + raise NotImplementedError(f"Please implement the `get_data` method") - # get stock data from qlib - if len(codes) == 0: - codes = D.instruments() - self.data = D.features( - codes, - all_fields, - start_time, - end_time, - freq=freq, - disk_cache=True - ).dropna(subset=["$close"]) - self.data.columns = all_fields - # check buy_price data and sell_price data - self.buy_price = buy_price - self.sell_price = sell_price - for attr in "buy_price", "sell_price": - pstr = getattr(self, attr) # price string - if self.data[pstr].isna().any(): - self.logger.warning("{} field data contains nan.".format(pstr)) - - # update trade_w_adj_price - if self.data["$factor"].isna().any(): - # The 'factor.day.bin' file not exists, and `factor` field contains `nan` - # Use adjusted price - self.logger.warning("factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.") - self.trade_w_adj_price = True - else: - # The `factor.day.bin` file exists and all data `close` and `factor` are not `nan` - # Use normal price - self.trade_w_adj_price = False - - # update limit - self._update_limit(limit_threshold) - - # concat extra_quote - quote_df = self.data - if extra_quote is not None: - # process extra_quote - if "$close" not in extra_quote: - raise ValueError("$close is necessray in extra_quote") - for attr in "buy_price", "sell_price": - pstr = getattr(self, attr) # price string - if pstr not in extra_quote.columns: - extra_quote[pstr] = extra_quote["$close"] - self.logger.warning(f"No {pstr} set for extra_quote. Use $close as {pstr}.") - if "$factor" not in extra_quote.columns: - extra_quote["$factor"] = 1.0 - self.logger.warning("No $factor set for extra_quote. Use 1.0 as $factor.") - if "limit_sell" not in extra_quote.columns: - extra_quote["limit_sell"] = False - self.logger.warning("No limit_sell set for extra_quote. All stock will be able to be sold.") - if "limit_buy" not in extra_quote.columns: - extra_quote["limit_buy"] = False - self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") - assert set(extra_quote.columns) == set(quote_df.columns) - {"$change"} - quote_df = pd.concat([quote_df, extra_quote], sort=False, axis=0) +class PandasQuote(BaseQuote): + def __init__(self, quote_df: pd.DataFrame): + super().__init__(quote_df=quote_df) quote_dict = {} for stock_id, stock_val in quote_df.groupby(level="instrument"): quote_dict[stock_id] = stock_val.droplevel(level="instrument") self.data = quote_dict - def _update_limit(self, limit_threshold): - # check limit_threshold - limit_type = self._get_limit_type(limit_threshold) - if limit_type == self.LT_NONE: - self.data["limit_buy"] = False - self.data["limit_sell"] = False - elif limit_type == self.LT_TP_EXP: - # set limit - self.data["limit_buy"] = self.data[limit_threshold[0]] - self.data["limit_sell"] = self.data[limit_threshold[1]] - elif limit_type == self.LT_FLT: - self.data["limit_buy"] = self.data["$change"].ge(limit_threshold) - self.data["limit_sell"] = self.data["$change"].le(-limit_threshold) # pylint: disable=E1130 - def get_all_stock(self): return self.data.keys() @@ -715,7 +686,4 @@ class PandasQuote(BaseQuote): elif(isinstance(fields, (str, list))): return resam_ts_data(self.data[stock_id][fields], start_time, end_time, method=method) else: - raise ValueError(f"fields must be None, str or list") - - def get_trade_w_adj_price(self): - return self.trade_w_adj_price \ No newline at end of file + raise ValueError(f"fields must be None, str or list") \ No newline at end of file From 6ad52e8cf5f7f8da56bbbfaac757de304343695c Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 16 Jul 2021 13:55:49 +0000 Subject: [PATCH 14/61] black and doc --- qlib/backtest/exchange.py | 59 ++++++++++++++------------ qlib/contrib/strategy/rule_strategy.py | 4 +- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 82f57462e..7733891fe 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -150,12 +150,7 @@ class Exchange: if len(self.codes) == 0: self.codes = D.instruments() self.quote_df = D.features( - self.codes, - self.all_fields, - self.start_time, - self.end_time, - freq=self.freq, - disk_cache=True + self.codes, self.all_fields, self.start_time, self.end_time, freq=self.freq, disk_cache=True ).dropna(subset=["$close"]) self.quote_df.columns = self.all_fields @@ -177,10 +172,9 @@ class Exchange: # The `factor.day.bin` file exists and all data `close` and `factor` are not `nan` # Use normal price self.trade_w_adj_price = False - # update limit self._update_limit(self.limit_threshold) - + # concat extra_quote if self.extra_quote is not None: # process extra_quote @@ -199,7 +193,7 @@ class Exchange: self.logger.warning("No limit_sell set for extra_quote. All stock will be able to be sold.") if "limit_buy" not in self.extra_quote.columns: self.extra_quote["limit_buy"] = False - self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") + self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.") assert set(self.extra_quote.columns) == set(self.quote_df.columns) - {"$change"} self.quote_df = pd.concat([self.quote_df, extra_quote], sort=False, axis=0) @@ -208,8 +202,7 @@ class Exchange: LT_NONE = "none" # none def _get_limit_type(self, limit_threshold): - """get limit type - """ + """get limit type""" if isinstance(limit_threshold, Tuple): return self.LT_TP_EXP elif isinstance(limit_threshold, float): @@ -603,7 +596,6 @@ class Exchange: class BaseQuote: - def __init__(self, quote_df: pd.DataFrame): self.logger = get_module_logger("online operator", level=logging.INFO) @@ -617,10 +609,17 @@ class BaseQuote: """ raise NotImplementedError(f"Please implement the `get_all_stock` method") - def get_data(self, stock_id: str, start_time, end_time, fields: Union[str, list]=None, method=None): + def get_data( + self, + stock_id: Union[str, list], + start_time: Union[pd.Timestamp, str], + end_time: Union[pd.Timestamp, str], + fields: Union[str, list] = None, + method: Union[str, Callable] = None, + ): """get the specific fields of stock data during start time and end_time, and apply method to the data. - + Example: .. code-block:: $close $volume @@ -637,8 +636,15 @@ class BaseQuote: 2010-01-12 2788.688232 164587.937500 2010-01-13 2790.604004 145460.453125 + print(get_data(stock_id=["SH600000", "SH600655"], start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) + + $close $volume + instrument + SH600000 87.433578 28117442.0 + SH600655 2699.567383 158193.328125 + print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) - + $close 87.433578 $volume 28117442.0 @@ -649,27 +655,26 @@ class BaseQuote: Parameters ---------- stock_id: Union[str, list] - start_time : pd.Timestamp|str + start_time : Union[pd.Timestamp, str] closed start time for backtest - end_time : pd.Timestamp|str + end_time : Union[pd.Timestamp, str] closed end time for backtest fields : Union[str, List] the columns of data to fetch method : Union[str, Callable] - the method apply to data. - e.g ["None", "last", "all", "sum", "mean", qlib/utils/resam.py/ts_data_last] + the method apply to data. + e.g ["None", "last", "all", "sum", "mean", "any", qlib/utils/resam.py/ts_data_last] Return ---------- - Union[None, float, pd.Series] - The resampled Series/value, return None when the resampled data is empty. + Union[None, float, pd.Series, pd.DataFrame] + The resampled DataFrame/Series/value, return None when the resampled data is empty. """ - raise NotImplementedError(f"Please implement the `get_data` method") + raise NotImplementedError(f"Please implement the `get_data` method") class PandasQuote(BaseQuote): - def __init__(self, quote_df: pd.DataFrame): super().__init__(quote_df=quote_df) quote_dict = {} @@ -680,10 +685,10 @@ class PandasQuote(BaseQuote): def get_all_stock(self): return self.data.keys() - def get_data(self, stock_id, start_time, end_time, fields = None, method = None): - if(fields is None): + def get_data(self, stock_id, start_time, end_time, fields=None, method=None): + if fields is None: return resam_ts_data(self.data[stock_id], start_time, end_time, method=method) - elif(isinstance(fields, (str, list))): + elif isinstance(fields, (str, list)): return resam_ts_data(self.data[stock_id][fields], start_time, end_time, method=method) else: - raise ValueError(f"fields must be None, str or list") \ No newline at end of file + raise ValueError(f"fields must be None, str or list") diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 56884cd48..970734df5 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -687,7 +687,9 @@ class FileOrderStrategy(BaseStrategy): - This class provides an interface for user to read orders from csv files. """ - def __init__(self, file: Union[IO, str, Path], trade_range: Union[Tuple[int, int], TradeRange]= None, *args, **kwargs): + def __init__( + self, file: Union[IO, str, Path], trade_range: Union[Tuple[int, int], TradeRange] = None, *args, **kwargs + ): """ Parameters From 2b8d4dc3c2cb744ac27a0e78860304d6d3218073 Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 16 Jul 2021 14:09:36 +0000 Subject: [PATCH 15/61] callable --- qlib/backtest/exchange.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 7733891fe..8d02e7893 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -615,7 +615,7 @@ class BaseQuote: start_time: Union[pd.Timestamp, str], end_time: Union[pd.Timestamp, str], fields: Union[str, list] = None, - method: Union[str, Callable] = None, + method: Union[str, "Callable"] = None, ): """get the specific fields of stock data during start time and end_time, and apply method to the data. From 47535ba53096f66cc99fb088b478304d55564f29 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Wed, 14 Jul 2021 18:55:58 +0800 Subject: [PATCH 16/61] add mlflow filter string support to limit too much run number --- qlib/workflow/exp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index 627b5ff82..63ab3ce40 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -325,7 +325,7 @@ class MLflowExperiment(Experiment): UNLIMITED = 50000 # FIXME: Mlflow can only list 50000 records at most!!!!!!! - def list_recorders(self, max_results: int = UNLIMITED, status: Union[str, None] = None): + def list_recorders(self, max_results: int = UNLIMITED, status: Union[str, None] = None, filter_string: str=""): """ Parameters ---------- @@ -334,8 +334,10 @@ class MLflowExperiment(Experiment): status : str the criteria based on status to filter results. `None` indicates no filtering. + filter_string : str + mlflow supported filter string like 'params."my_param"="a" and tags."my_tag"="b"', use this will help to reduce too much run number. """ - runs = self._client.search_runs(self.id, run_view_type=ViewType.ACTIVE_ONLY, max_results=max_results) + runs = self._client.search_runs(self.id, run_view_type=ViewType.ACTIVE_ONLY, max_results=max_results, filter_string=filter_string) recorders = dict() for i in range(len(runs)): recorder = MLflowRecorder(self.id, self._uri, mlflow_run=runs[i]) From b723f14619a865eeb595bc6f858fe0aedddc5403 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Wed, 14 Jul 2021 19:04:10 +0800 Subject: [PATCH 17/61] apply filter string to recorder collector --- qlib/workflow/task/collect.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py index 36ccf434d..c7d82d541 100644 --- a/qlib/workflow/task/collect.py +++ b/qlib/workflow/task/collect.py @@ -139,6 +139,7 @@ class RecorderCollector(Collector): rec_filter_func=None, artifacts_path={"pred": "pred.pkl"}, artifacts_key=None, + filter_string: str = "" ): """ Init RecorderCollector. @@ -150,6 +151,7 @@ class RecorderCollector(Collector): rec_filter_func (Callable, optional): filter the recorder by return True or False. Defaults to None. artifacts_path (dict, optional): The artifacts name and its path in Recorder. Defaults to {"pred": "pred.pkl", "IC": "sig_analysis/ic.pkl"}. artifacts_key (str or List, optional): the artifacts key you want to get. If None, get all artifacts. + filter_string (str): filter string that used to apply in recorder quering (only support mlflow for now). """ super().__init__(process_list=process_list) if isinstance(experiment, str): @@ -163,6 +165,7 @@ class RecorderCollector(Collector): self.rec_key_func = rec_key_func self.artifacts_key = artifacts_key self.rec_filter_func = rec_filter_func + self.filter_string = filter_string def collect(self, artifacts_key=None, rec_filter_func=None, only_exist=True) -> dict: """ @@ -187,7 +190,7 @@ class RecorderCollector(Collector): collect_dict = {} # filter records - recs = self.experiment.list_recorders() + recs = self.experiment.list_recorders(filter_string=self.filter_string) recs_flt = {} for rid, rec in recs.items(): if rec_filter_func is None or rec_filter_func(rec): From 28cb827a23120fa7dcbfadb5a4c8119f33d49359 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Thu, 15 Jul 2021 11:46:10 +0800 Subject: [PATCH 18/61] fix lint issue --- qlib/workflow/exp.py | 6 ++++-- qlib/workflow/task/collect.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index 63ab3ce40..fcf6cd8d1 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -325,7 +325,7 @@ class MLflowExperiment(Experiment): UNLIMITED = 50000 # FIXME: Mlflow can only list 50000 records at most!!!!!!! - def list_recorders(self, max_results: int = UNLIMITED, status: Union[str, None] = None, filter_string: str=""): + def list_recorders(self, max_results: int = UNLIMITED, status: Union[str, None] = None, filter_string: str = ""): """ Parameters ---------- @@ -337,7 +337,9 @@ class MLflowExperiment(Experiment): filter_string : str mlflow supported filter string like 'params."my_param"="a" and tags."my_tag"="b"', use this will help to reduce too much run number. """ - runs = self._client.search_runs(self.id, run_view_type=ViewType.ACTIVE_ONLY, max_results=max_results, filter_string=filter_string) + runs = self._client.search_runs( + self.id, run_view_type=ViewType.ACTIVE_ONLY, max_results=max_results, filter_string=filter_string + ) recorders = dict() for i in range(len(runs)): recorder = MLflowRecorder(self.id, self._uri, mlflow_run=runs[i]) diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py index c7d82d541..6f92034aa 100644 --- a/qlib/workflow/task/collect.py +++ b/qlib/workflow/task/collect.py @@ -139,7 +139,7 @@ class RecorderCollector(Collector): rec_filter_func=None, artifacts_path={"pred": "pred.pkl"}, artifacts_key=None, - filter_string: str = "" + filter_string: str = "", ): """ Init RecorderCollector. From 8b715268bd99d33894eab035c6ab8ac7c4ade261 Mon Sep 17 00:00:00 2001 From: chaosyu Date: Fri, 16 Jul 2021 19:44:06 +0800 Subject: [PATCH 19/61] use list_kwargs instead filter_string --- qlib/workflow/task/collect.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py index 6f92034aa..d8397b432 100644 --- a/qlib/workflow/task/collect.py +++ b/qlib/workflow/task/collect.py @@ -139,7 +139,7 @@ class RecorderCollector(Collector): rec_filter_func=None, artifacts_path={"pred": "pred.pkl"}, artifacts_key=None, - filter_string: str = "", + list_kwargs={}, ): """ Init RecorderCollector. @@ -165,7 +165,7 @@ class RecorderCollector(Collector): self.rec_key_func = rec_key_func self.artifacts_key = artifacts_key self.rec_filter_func = rec_filter_func - self.filter_string = filter_string + self.list_kwargs = list_kwargs def collect(self, artifacts_key=None, rec_filter_func=None, only_exist=True) -> dict: """ @@ -190,7 +190,7 @@ class RecorderCollector(Collector): collect_dict = {} # filter records - recs = self.experiment.list_recorders(filter_string=self.filter_string) + recs = self.experiment.list_recorders(**self.list_kwargs) recs_flt = {} for rid, rec in recs.items(): if rec_filter_func is None or rec_filter_func(rec): From 3183a232df7315108947fb21f12352b81dc74fad Mon Sep 17 00:00:00 2001 From: chaosyu Date: Fri, 16 Jul 2021 19:48:46 +0800 Subject: [PATCH 20/61] update doc str --- qlib/workflow/task/collect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py index d8397b432..467281666 100644 --- a/qlib/workflow/task/collect.py +++ b/qlib/workflow/task/collect.py @@ -151,7 +151,7 @@ class RecorderCollector(Collector): rec_filter_func (Callable, optional): filter the recorder by return True or False. Defaults to None. artifacts_path (dict, optional): The artifacts name and its path in Recorder. Defaults to {"pred": "pred.pkl", "IC": "sig_analysis/ic.pkl"}. artifacts_key (str or List, optional): the artifacts key you want to get. If None, get all artifacts. - filter_string (str): filter string that used to apply in recorder quering (only support mlflow for now). + list_kwargs (str): arguments for list_recorders function. """ super().__init__(process_list=process_list) if isinstance(experiment, str): From f8a2b0533bee39f87d4478a638da0c7d9ced1aab Mon Sep 17 00:00:00 2001 From: slowy07 Date: Sun, 18 Jul 2021 09:01:45 +0700 Subject: [PATCH 21/61] lgtm issue: fixing unused import of 'time' --- qlib/model/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qlib/model/trainer.py b/qlib/model/trainer.py index a534a7a3b..3f1ae8a96 100644 --- a/qlib/model/trainer.py +++ b/qlib/model/trainer.py @@ -12,7 +12,6 @@ In ``DelayTrainer``, the first step is only to save some necessary info to model """ import socket -import time from typing import Callable, List from qlib.data.dataset import Dataset From 2023f714c9e7b630067a9f7abacbd16066afa51f Mon Sep 17 00:00:00 2001 From: slowy07 Date: Sun, 18 Jul 2021 09:08:43 +0700 Subject: [PATCH 22/61] [fixed] lgtm issue : unused imported module of 'signal' and change to PEP8 style code imported module --- qlib/workflow/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qlib/workflow/utils.py b/qlib/workflow/utils.py index 5a93eacca..6e1e76529 100644 --- a/qlib/workflow/utils.py +++ b/qlib/workflow/utils.py @@ -1,10 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import sys, traceback, signal, atexit, logging +import atexit +import logging +import sys +import traceback + +from ..log import get_module_logger from . import R from .recorder import Recorder -from ..log import get_module_logger logger = get_module_logger("workflow", logging.INFO) From c97a96363d378051c2a25bb2c60792fb90667fdc Mon Sep 17 00:00:00 2001 From: zhupr Date: Thu, 15 Jul 2021 22:12:53 +0800 Subject: [PATCH 23/61] Add a check if change is mutated to YahooNormalize1d --- scripts/data_collector/yahoo/collector.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index e0e6e0368..feb28a94f 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -325,9 +325,22 @@ class YahooNormalize(BaseNormalize): # NOTE: The data obtained by Yahoo finance sometimes has exceptions # WARNING: If it is normal for a `symbol(exchange)` to differ by a factor of *89* to *111* for consecutive trading days, # WARNING: the logic in the following line needs to be modified - _mask = (change_series >= 89) & (change_series <= 111) - _tmp_cols = ["high", "close", "low", "open", "adjclose"] - df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100 + _count = 0 + while True: + # NOTE: may appear unusual for many days in a row + change_series = YahooNormalize.calc_change(df, last_close) + _mask = (change_series >= 89) & (change_series <= 111) + if not _mask.any(): + break + _tmp_cols = ["high", "close", "low", "open", "adjclose"] + df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100 + _count += 1 + if _count >= 10: + _symbol = df.loc[df[symbol_field_name].first_valid_index()]["symbol"] + logger.warning( + f"{_symbol} `change` is abnormal for {_count} consecutive days, please check the specific data file carefully" + ) + df["change"] = YahooNormalize.calc_change(df, last_close) columns += ["change"] From 3f5334ab39f1810441b2b27e4f3167941e33e62c Mon Sep 17 00:00:00 2001 From: wuzhe1234 <46434750+wuzhe1234@users.noreply.github.com> Date: Mon, 19 Jul 2021 09:29:37 +0800 Subject: [PATCH 24/61] Update qrun to automaticly save the config to the artifacts uri --- qlib/workflow/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py index 879c0aaeb..16e5b6296 100644 --- a/qlib/workflow/cli.py +++ b/qlib/workflow/cli.py @@ -53,7 +53,8 @@ def workflow(config_path, experiment_name="workflow", uri_folder="mlruns"): exp_manager["kwargs"]["uri"] = "file:" + str(Path(os.getcwd()).resolve() / uri_folder) qlib.init(**config.get("qlib_init"), exp_manager=exp_manager) - task_train(config.get("task"), experiment_name=experiment_name) + recorder = task_train(config.get("task"), experiment_name=experiment_name) + recorder.save_objects(config=config) # function to run worklflow by config From 4e862f7d1fec5b765cbec44f1cd41f8ff377e7ca Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 20 Jul 2021 05:12:22 +0000 Subject: [PATCH 25/61] add print cash in verbose mode and code format --- qlib/backtest/account.py | 2 +- qlib/backtest/exchange.py | 20 +++++++------- qlib/backtest/executor.py | 36 ++++++++++---------------- qlib/contrib/strategy/rule_strategy.py | 25 +++++++++--------- 4 files changed, 37 insertions(+), 46 deletions(-) diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 806f88a96..13213c344 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -160,7 +160,7 @@ class Account: self.accum_info.add_return_value(profit) # note here do not consider cost def update_order(self, order, trade_val, cost, trade_price): - if not self.is_port_metr_enabled(): + if self.current.skip_update(): # TODO: supporting polymorphism for account # updating order for infinite position is meaningless return diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index a22754885..ea1d012eb 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -512,7 +512,7 @@ class Exchange: def _get_factor_or_raise_erorr(self, factor: float = None, stock_id: str = None, start_time=None, end_time=None): """Please refer to the docs of get_amount_of_trade_unit""" if factor is None: - if stock_id is not None and start_time is not None and end_time is not None : + if stock_id is not None and start_time is not None and end_time is not None: factor = self.get_factor(stock_id=stock_id, start_time=start_time, end_time=end_time) else: raise ValueError(f"`factor` and (`stock_id`, `start_time`, `end_time`) can't both be None") @@ -537,15 +537,16 @@ class Exchange: the end time of trading range """ if not self.trade_w_adj_price and self.trade_unit is not None: - factor = self._get_factor_or_raise_erorr(factor=factor, - stock_id=stock_id, - start_time=start_time, - end_time=end_time) + factor = self._get_factor_or_raise_erorr( + factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time + ) return self.trade_unit / factor else: return None - def round_amount_by_trade_unit(self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None): + def round_amount_by_trade_unit( + self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None + ): """Parameter Please refer to the docs of get_amount_of_trade_unit @@ -555,10 +556,9 @@ class Exchange: """ if not self.trade_w_adj_price and self.trade_unit is not None: # the minimal amount is 1. Add 0.1 for solving precision problem. - factor = self._get_factor_or_raise_erorr(factor=factor, - stock_id=stock_id, - start_time=start_time, - end_time=end_time) + factor = self._get_factor_or_raise_erorr( + factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time + ) return (deal_amount * factor + 0.1) // self.trade_unit * self.trade_unit / factor return deal_amount diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 999e6d8a7..b05b73801 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -495,30 +495,22 @@ class SimulatorExecutor(BaseExecutor): execute_result.append((order, trade_val, trade_cost, trade_price)) if self.verbose: if order.direction == Order.SELL: # sell - print( - "[I {:%Y-%m-%d %H:%M:%S}]: sell {}, price {:.2f}, amount {}, deal_amount {}, factor {}, value {:.2f}.".format( - trade_start_time, - order.stock_id, - trade_price, - order.amount, - order.deal_amount, - order.factor, - trade_val, - ) - ) + action = "sell" else: - print( - "[I {:%Y-%m-%d %H:%M:%S}]: buy {}, price {:.2f}, amount {}, deal_amount {}, factor {}, value {:.2f}.".format( - trade_start_time, - order.stock_id, - trade_price, - order.amount, - order.deal_amount, - order.factor, - trade_val, - ) + action = "buy" + print( + "[I {:%Y-%m-%d %H:%M:%S}]: {} {}, price {:.2f}, amount {}, deal_amount {}, factor {}, value {:.2f}, cach {:.2f}.".format( + trade_start_time, + action, + order.stock_id, + trade_price, + order.amount, + order.deal_amount, + order.factor, + trade_val, + self.trade_account.get_cash(), ) - + ) else: if self.verbose: print("[W {:%Y-%m-%d %H:%M:%S}]: {} wrong.".format(trade_start_time, order.stock_id)) diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 24386f723..36059f5a0 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -63,9 +63,9 @@ class TWAPStrategy(BaseStrategy): stock_id=order.stock_id, start_time=trade_start_time, end_time=trade_end_time ): continue - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) _order_amount = None # considering trade unit if _amount_trade_unit is None: @@ -169,9 +169,9 @@ class SBBStrategyBase(BaseStrategy): self.trade_trend[order.stock_id] = _pred_trend continue # get amount of one trade unit - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if _pred_trend == self.TREND_MID: _order_amount = None # considering trade unit @@ -471,9 +471,9 @@ class ACStrategy(BaseStrategy): if sig_sam is None or np.isnan(sig_sam): # no signal, TWAP - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if _amount_trade_unit is None: # divide the order into equal parts, and trade one part _order_amount = self.trade_amount[order.stock_id] / (trade_len - trade_step) @@ -494,10 +494,9 @@ class ACStrategy(BaseStrategy): np.sinh(kappa * (trade_len - trade_step)) - np.sinh(kappa * (trade_len - trade_step - 1)) ) / np.sinh(kappa * trade_len) _order_amount = order.amount * amount_ratio - _order_amount = self.trade_exchange.round_amount_by_trade_unit(_order_amount, - stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _order_amount = self.trade_exchange.round_amount_by_trade_unit( + _order_amount, stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if order.direction == order.SELL: # sell all amount at last From bf7732e2842277fbb9f52063e5a888e137aae12e Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 21 Jul 2021 00:02:33 +0800 Subject: [PATCH 26/61] =?UTF-8?q?fix=20df=5Ffeatures.index=20con=C8=9Bine?= =?UTF-8?q?=20np.nan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/highfreq/highfreq_handler.py | 2 -- examples/highfreq/highfreq_processor.py | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py index 19bb2550b..c15c3ec41 100644 --- a/examples/highfreq/highfreq_handler.py +++ b/examples/highfreq/highfreq_handler.py @@ -99,8 +99,6 @@ class HighFreqHandler(DataHandlerLP): ] names += ["$volume_1"] - fields += ["Cut({0}, 240, None)".format(template_paused.format("Date($close)"))] - names += ["date"] return fields, names diff --git a/examples/highfreq/highfreq_processor.py b/examples/highfreq/highfreq_processor.py index f0ab0dec2..62065469b 100644 --- a/examples/highfreq/highfreq_processor.py +++ b/examples/highfreq/highfreq_processor.py @@ -33,6 +33,9 @@ class HighFreqNorm(Processor): self.feature_vmin[name] = np.nanmin(part_values) def __call__(self, df_features): + df_features["date"] = pd.to_datetime( + df_features.index.get_level_values(level="datetime").to_series().dt.date.values + ) df_features.set_index("date", append=True, drop=True, inplace=True) df_values = df_features.values names = { From f03df874bf7833200464d6ba35190cf51bcc0f65 Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 21 Jul 2021 01:04:01 +0800 Subject: [PATCH 27/61] fix macos-test-ci --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7b775bf4..67b39415c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,11 +47,13 @@ jobs: fi shell: bash - - name: Install Lightgbm for MacOS + - name: Install Lightgbm and Upgrade openssl for MacOS if: runner.os == 'macOS' run: | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm + brew upgrade openssl + sudo $CONDA/bin/python -m pip install -U pyopenssl idna - name: Test data downloads run: | From eb3c5b3088328e8f895063d49685911c926e25e1 Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 21 Jul 2021 13:16:34 +0800 Subject: [PATCH 28/61] macos-test-ci split out separately --- .github/workflows/test.yml | 10 +---- .github/workflows/test_macos.yml | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/test_macos.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 67b39415c..7a78d2d9a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest] + os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04] python-version: [3.6, 3.7, 3.8, 3.9] steps: @@ -46,14 +46,6 @@ jobs: sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy fi shell: bash - - - name: Install Lightgbm and Upgrade openssl for MacOS - if: runner.os == 'macOS' - run: | - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" - HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm - brew upgrade openssl - sudo $CONDA/bin/python -m pip install -U pyopenssl idna - name: Test data downloads run: | diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml new file mode 100644 index 000000000..9a3bccac9 --- /dev/null +++ b/.github/workflows/test_macos.yml @@ -0,0 +1,72 @@ +name: Test MacOS + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: macos-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Lint with Black + run: | + cd .. + sudo $CONDA/bin/python -m pip install black + $CONDA/bin/python -m black qlib -l 120 --check --diff + + # Test Qlib installed with pip + - name: Install Qlib with pip + run: | + sudo $CONDA/bin/python -m pip install numpy==1.19.5 + sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy + + - name: Install Lightgbm for MacOS + run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" + HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm + + - name: Test data downloads + run: | + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + + - name: Test workflow by config (install from pip) + run: | + $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + sudo $CONDA/bin/python -m pip uninstall -y pyqlib + + # Test Qlib installed from source + - name: Install Qlib from source + run: | + sudo $CONDA/bin/python -m pip install --upgrade cython + sudo $CONDA/bin/python -m pip install numpy jupyter jupyter_contrib_nbextensions + sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't + sudo $CONDA/bin/python setup.py install + + - name: Install test dependencies + run: | + sudo $CONDA/bin/python -m pip install --upgrade pip + sudo $CONDA/bin/python -m pip install -U pyopenssl idna + sudo $CONDA/bin/python -m pip install black pytest + + - name: Unit tests with Pytest + run: | + cd tests + $CONDA/bin/python -m pytest . --durations=0 + + - name: Test workflow by config (install from source) + run: | + $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml From 5519420efd652b18be790f9957281892ec476162 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Wed, 21 Jul 2021 18:30:25 +0800 Subject: [PATCH 29/61] Update test_macos.yml Give more comments about the MacOS test yaml --- .github/workflows/test_macos.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml index 9a3bccac9..57aa87ded 100644 --- a/.github/workflows/test_macos.yml +++ b/.github/workflows/test_macos.yml @@ -1,3 +1,4 @@ +# There are some issues (in the downloading data phase) on MacOS when running with other tests. So we split it into an individual config. name: Test MacOS on: From 83d4387e9f51fb8c51b695952877a039f6b7d25d Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Wed, 21 Jul 2021 12:47:31 +0000 Subject: [PATCH 30/61] pandas_order_indicator --- qlib/backtest/report.py | 330 ++++++++++++++++++++++++++++++---------- 1 file changed, 251 insertions(+), 79 deletions(-) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 308decd12..8e093e0a6 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -5,8 +5,9 @@ from collections import OrderedDict from logging import warning import pathlib -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union import warnings +import inspect import numpy as np import pandas as pd @@ -62,6 +63,7 @@ class Report: - Else, it represent end time of benchmark, by default None """ + self.init_vars() self.init_bench(freq=freq, benchmark_config=benchmark_config) @@ -255,7 +257,7 @@ class Indicator: def __init__(self): # order indicator is metrics for a single order for a specific step self.order_indicator_his = OrderedDict() - self.order_indicator: Dict[str, pd.Series] = OrderedDict() + self.order_indicator = PandasOrderIndicator() # trade indicator is metrics for all orders for a specific step self.trade_indicator_his = OrderedDict() @@ -265,12 +267,12 @@ class Indicator: # def reset(self, trade_calendar: TradeCalendarManager): def reset(self): - self.order_indicator = OrderedDict() + self.order_indicator = PandasOrderIndicator() self.trade_indicator = OrderedDict() # self._trade_calendar = trade_calendar def record(self, trade_start_time): - self.order_indicator_his[trade_start_time] = self.order_indicator + self.order_indicator_his[trade_start_time] = self.order_indicator.data self.trade_indicator_his[trade_start_time] = self.trade_indicator def _update_order_trade_info(self, trade_info: list): @@ -280,6 +282,7 @@ class Indicator: trade_value = dict() trade_cost = dict() trade_dir = dict() + pa = dict() for order, _trade_val, _trade_cost, _trade_price in trade_info: amount[order.stock_id] = order.amount_delta @@ -288,66 +291,58 @@ class Indicator: trade_value[order.stock_id] = _trade_val * order.sign trade_cost[order.stock_id] = _trade_cost trade_dir[order.stock_id] = order.direction + pa[order.stock_id] = 0 - self.order_indicator["amount"] = self.order_indicator["inner_amount"] = pd.Series(amount) - self.order_indicator["deal_amount"] = pd.Series(deal_amount) + self.order_indicator.assign("amount", amount) + self.order_indicator.assign("inner_amount", amount) + self.order_indicator.assign("deal_amount", deal_amount) # NOTE: trade_price and baseline price will be same on the lowest-level - self.order_indicator["trade_price"] = pd.Series(trade_price) - self.order_indicator["trade_value"] = pd.Series(trade_value) - self.order_indicator["trade_cost"] = pd.Series(trade_cost) - self.order_indicator["trade_dir"] = pd.Series(trade_dir) + self.order_indicator.assign("trade_price", trade_price) + self.order_indicator.assign("trade_value", trade_value) + self.order_indicator.assign("trade_cost", trade_cost) + self.order_indicator.assign("trade_dir", trade_dir) + self.order_indicator.assign("pa", pa) def _update_order_fulfill_rate(self): - self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] + def func(deal_amount, amount): + return deal_amount / amount + self.order_indicator.transfer(func, "ffr") + """ def _update_order_price_advantage(self): # NOTE: # trade_price and baseline price will be same on the lowest-level # So Pa should be 0 or do nothing - self.order_indicator["pa"] = 0 + self.order_indicator.assign("pa", 0) + """ def update_order_indicators(self, trade_info: list): self._update_order_trade_info(trade_info=trade_info) self._update_order_fulfill_rate() - self._update_order_price_advantage() + # self._update_order_price_advantage() def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]): - inner_amount = pd.Series() - deal_amount = pd.Series() - trade_price = pd.Series() - trade_value = pd.Series() - trade_cost = pd.Series() - trade_dir = pd.Series() - for _order_indicator in inner_order_indicators: - inner_amount = inner_amount.add(_order_indicator["inner_amount"], fill_value=0) - deal_amount = deal_amount.add(_order_indicator["deal_amount"], fill_value=0) - trade_price = trade_price.add( - _order_indicator["trade_price"] * _order_indicator["deal_amount"], fill_value=0 - ) - trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0) - trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0) - trade_dir = trade_dir.add(_order_indicator["trade_dir"], fill_value=0) + all_metric = ["inner_amount", "deal_amount", "trade_price", + "trade_value", "trade_cost", "trade_dir"] + metric_dict = PandasOrderIndicator.agg_all_indicators(inner_order_indicators, all_metric, fill_value=0) + for metric in metric_dict: + self.order_indicator.assign(metric, metric_dict[metric]) - trade_dir = trade_dir.apply(Order.parse_dir) + def func(trade_price, deal_amount): + return trade_price / deal_amount + self.order_indicator.transfer(func, "trade_price") - self.order_indicator["inner_amount"] = inner_amount - self.order_indicator["deal_amount"] = deal_amount - trade_price /= self.order_indicator["deal_amount"] - self.order_indicator["trade_price"] = trade_price - self.order_indicator["trade_value"] = trade_value - self.order_indicator["trade_cost"] = trade_cost - self.order_indicator["trade_dir"] = trade_dir + def func_apply(trade_dir): + return trade_dir.apply(Order.parse_dir) + self.order_indicator.transfer(func_apply, "trade_dir") def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision): # NOTE: these indicator is designed for order execution, so the decision: List[Order] = outer_trade_decision.get_decision() if decision is None: - self.order_indicator["amount"] = pd.Series() + self.order_indicator.assign("amount", {}) else: - self.order_indicator["amount"] = pd.Series({order.stock_id: order.amount_delta for order in decision}) - - def _agg_order_fulfill_rate(self): - self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] + self.order_indicator.assign("amount", {order.stock_id: order.amount_delta for order in decision}) def _get_base_vol_pri( self, @@ -423,17 +418,16 @@ class Indicator: "price": "$close", # TODO: this is not supported now!!!!! # default to use deal price of the exchange } - """ # TODO: I think there are potentials to be optimized - trade_dir = self.order_indicator["trade_dir"] + trade_dir = self.order_indicator.get_metric_series("trade_dir") if len(trade_dir) > 0: bp_all, bv_all = [], [] # for oi, (dec, start, end) in zip(inner_order_indicators, decision_list): - bp_s = oi.get("base_price", pd.Series()).reindex(trade_dir.index) - bv_s = oi.get("base_volume", pd.Series()).reindex(trade_dir.index) + bp_s = oi.get_metric_series("base_price").reindex(trade_dir.index) + bv_s = oi.get_metric_series("base_volume").reindex(trade_dir.index) bp_new, bv_new = {}, {} for pr, v, (inst, direction) in zip(bp_s.values, bv_s.values, trade_dir.items()): if np.isnan(pr): @@ -457,17 +451,21 @@ class Indicator: bp_all = pd.concat(bp_all, axis=1) bv_all = pd.concat(bv_all, axis=1) - self.order_indicator["base_volume"] = bv_all.sum(axis=1) - self.order_indicator["base_price"] = (bp_all * bv_all).sum(axis=1) / self.order_indicator["base_volume"] + base_volume = bv_all.sum(axis=1) + self.order_indicator.assign("base_volume", base_volume) + self.order_indicator.assign("base_price", (bp_all * bv_all).sum(axis=1) / base_volume) def _agg_order_price_advantage(self): - if not self.order_indicator["trade_price"].empty: - sign = 1 - self.order_indicator["trade_dir"] * 2 - self.order_indicator["pa"] = sign * ( - self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1 - ) + def if_empty_func(trade_price): + return trade_price.empty + if_empty = self.order_indicator.transfer(if_empty_func) + if not if_empty: + def func(trade_dir, trade_price, base_price): + sign = 1 - trade_dir * 2 + return sign * (trade_price / base_price - 1) + self.order_indicator.transfer(func, "pa") else: - self.order_indicator["pa"] = pd.Series() + self.order_indicator.assign("pa", {}) def agg_order_indicators( self, @@ -477,57 +475,60 @@ class Indicator: trade_exchange: Exchange, indicator_config={}, ): - self._agg_order_trade_info(inner_order_indicators) + self._agg_order_trade_info(inner_order_indicators) # TODO self._update_trade_amount(outer_trade_decision) - self._agg_order_fulfill_rate() + self._update_order_fulfill_rate() pa_config = indicator_config.get("pa_config", {}) - self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config) + self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config) # TODO self._agg_order_price_advantage() def _cal_trade_fulfill_rate(self, method="mean"): if method == "mean": - return self.order_indicator["ffr"].mean() + def func(ffr): + return ffr.mean() elif method == "amount_weighted": - weights = self.order_indicator["deal_amount"].abs() - return (self.order_indicator["ffr"] * weights).sum() / weights.sum() + def func(ffr, deal_amount): + return (ffr * deal_amount.abs()).sum() / (deal_amount.abs().sum()) elif method == "value_weighted": - weights = self.order_indicator["trade_value"].abs() - return (self.order_indicator["ffr"] * weights).sum() / weights.sum() + def func(ffr, trade_value): + return (ffr * trade_value.abs()).sum() / (trade_value.abs().sum()) else: raise ValueError(f"method {method} is not supported!") + return self.order_indicator.transfer(func) def _cal_trade_price_advantage(self, method="mean"): - pa_order = self.order_indicator["pa"] - if isinstance(pa_order, (int, float)): - # pa from atomic executor - return pa_order - if method == "mean": - return pa_order.mean() + def func(pa): + return pa.mean() elif method == "amount_weighted": - weights = self.order_indicator["deal_amount"].abs() - return (pa_order * weights).sum() / weights.sum() + def func(pa, deal_amount): + return (pa * deal_amount.abs()).sum() / (deal_amount.abs().sum()) elif method == "value_weighted": - weights = self.order_indicator["trade_value"].abs() - return (pa_order * weights).sum() / weights.sum() + def func(pa, trade_value): + return (pa * trade_value.abs()).sum() / (trade_value.abs().sum()) else: raise ValueError(f"method {method} is not supported!") + return self.order_indicator.transfer(func) def _cal_trade_positive_rate(self): - pa_order = self.order_indicator["pa"] - if isinstance(pa_order, (int, float)): - # pa from atomic executor - return pa_order - return (pa_order > 0).astype(int).sum() / pa_order.count() + def func(pa): + return (pa > 0).astype(int).sum() / pa.count() + return self.order_indicator.transfer(func) def _cal_deal_amount(self): - return self.order_indicator["deal_amount"].abs().sum() + def func(deal_amount): + return deal_amount.abs().sum() + return self.order_indicator.transfer(func) def _cal_trade_value(self): - return self.order_indicator["trade_value"].abs().sum() + def func(trade_value): + return trade_value.abs().sum() + return self.order_indicator.transfer(func) def _cal_trade_order_count(self): - return self.order_indicator["amount"].count() + def func(amount): + return amount.count() + return self.order_indicator.transfer(func) def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}): show_indicator = indicator_config.get("show_indicator", False) @@ -560,3 +561,174 @@ class Indicator: def generate_trade_indicators_dataframe(self): return pd.DataFrame.from_dict(self.trade_indicator_his, orient="index") + + +class BaseOrderIndicator: + + def __init__(self): + pass + + def assign(self, col: str, metric: Union[dict, pd.Series]): + pass + + def transfer(self, func: "Callable", new_col = None): + pass + + def get_metric_series(self, metric: str): + pass + + @classmethod + def agg_all_indicators(indicators, metrics: Union[str, List[str]], fill_value = None): + pass + + +class PandasOrderIndicator(BaseOrderIndicator): + + class SingleMetric: + def __init__(self, metric: Union[dict, pd.Series]): + if isinstance(metric, dict): + self.metric = pd.Series(metric) + elif isinstance(metric, pd.Series): + self.metric = metric + else: + raise ValueError(f"metric must be dict or pd.Series") + + def __add__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric + other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric + other.metric) + else: + return NotImplemented + + def __radd__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(other + self.metric) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(other.metric + self.metric) + else: + return NotImplemented + + def __sub__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric - other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric - other.metric) + else: + return NotImplemented + + def __rsub__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(other - self.metric) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(other.metric - self.metric) + else: + return NotImplemented + + def __mul__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric * other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric * other.metric) + else: + return NotImplemented + + def __truediv__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric / other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric / other.metric) + else: + return NotImplemented + + def __eq__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric == other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric == other.metric) + else: + return NotImplemented + + def __gt__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric < other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric < other.metric) + else: + return NotImplemented + + def __lt__(self, other): + if isinstance(other, (int, float)): + return PandasOrderIndicator.SingleMetric(self.metric > other) + elif isinstance(other, PandasOrderIndicator.SingleMetric): + return PandasOrderIndicator.SingleMetric(self.metric > other.metric) + else: + return NotImplemented + + def __len__(self): + return len(self.metric) + + def sum(self): + return self.metric.sum() + + def mean(self): + return self.metric.mean() + + def count(self): + return self.metric.count() + + def abs(self): + return PandasOrderIndicator.SingleMetric(self.metric.abs()) + + def astype(self, type): + return PandasOrderIndicator.SingleMetric(self.metric.astype(type)) + + @property + def empty(self): + return self.metric.empty + + """ + @property + def index(self): + return self.metric.index + """ + + def add(self, other, fill_value: None): + return PandasOrderIndicator.SingleMetric(self.metric.add(other.metric, fill_value = fill_value)) + + def apply(self, map_dict: dict): + return PandasOrderIndicator.SingleMetric(self.metric.apply(map_dict)) + + def __init__(self): + self.data: Dict[str, self.SingleMetric] = OrderedDict() + + def assign(self, col: str, metric: Union[dict, pd.Series]): + self.data[col] = self.SingleMetric(metric) + + def transfer(self, func: "Callable", new_col = None): + func_sig = inspect.signature(func).parameters.keys() + func_kwargs = {sig: self.data[sig] for sig in func_sig} + tmp_metric = func(**func_kwargs) + if(new_col is not None): + self.data[new_col] = tmp_metric + return tmp_metric + + def get_metric_series(self, metric: str): + if(metric in self.data): + return self.data[metric].metric + else: + return pd.Series() + + @staticmethod + def agg_all_indicators(indicators: list, metrics: Union[str, List[str]], fill_value = None): + """add all order indicators with same metric""" + + metric_dict = {} + if isinstance(metrics, str): + metrics = [metrics] + for metric in metrics: + tmp_metric = PandasOrderIndicator.SingleMetric({}) + for indicator in indicators: + tmp_metric.add(indicator.data[metric], fill_value) + metric_dict[metric] = tmp_metric.metric + return metric_dict \ No newline at end of file From 10c182e2b06d9a66ecd32b71b2ddc4f835df96cf Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Wed, 21 Jul 2021 14:09:12 +0000 Subject: [PATCH 31/61] add order_indicator doc --- qlib/backtest/report.py | 96 ++++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 8e093e0a6..1ae50f5e2 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -308,14 +308,6 @@ class Indicator: return deal_amount / amount self.order_indicator.transfer(func, "ffr") - """ - def _update_order_price_advantage(self): - # NOTE: - # trade_price and baseline price will be same on the lowest-level - # So Pa should be 0 or do nothing - self.order_indicator.assign("pa", 0) - """ - def update_order_indicators(self, trade_info: list): self._update_order_trade_info(trade_info=trade_info) self._update_order_fulfill_rate() @@ -475,7 +467,7 @@ class Indicator: trade_exchange: Exchange, indicator_config={}, ): - self._agg_order_trade_info(inner_order_indicators) # TODO + self._agg_order_trade_info(inner_order_indicators) self._update_trade_amount(outer_trade_decision) self._update_order_fulfill_rate() pa_config = indicator_config.get("pa_config", {}) @@ -564,27 +556,97 @@ class Indicator: class BaseOrderIndicator: + """The data structure of order indicator. + """ def __init__(self): pass def assign(self, col: str, metric: Union[dict, pd.Series]): + """assign one metric. + + Parameters + ---------- + col : str + the metric name of one metric. + metric : Union[dict, pd.Series] + the metric data. + """ + pass - def transfer(self, func: "Callable", new_col = None): + def transfer(self, func: "Callable", new_col: str = None): + """compute new metric with existing. + + Parameters + ---------- + func : Callable + the func of computing new metric. + the kwargs of func will be replaced with metric data by name in this function. + e.g. + def func(pa): + return (pa > 0).astype(int).sum() / pa.count() + new_col : str, optional + New metric will be assigned in the data if new_col is not None, by default None. + + Return + ---------- + SingleMetric + new metric. + """ + pass def get_metric_series(self, metric: str): + """return the single metric with pd.Series format + + Parameters + ---------- + metric : str + the metric name. + + Return + ---------- + pd.Series + the single metric. + If there is no metric name in the data, return pd.Series(). + """ + pass @classmethod - def agg_all_indicators(indicators, metrics: Union[str, List[str]], fill_value = None): + def agg_all_indicators(indicators: list, metrics: Union[str, List[str]], fill_value: float = None): + """sum indicators with the same metrics. + + Parameters + ---------- + indicators : List[BaseOrderIndicator] + the list of all inner indicators. + metrics : Union[str, List[str]] + all metrics needs ot be sumed. + fill_value : float, optional + fill np.NaN with value. By default None. + + Return + ---------- + Dict[str: SingleMetric] + a dict of metric name and data. + """ + pass class PandasOrderIndicator(BaseOrderIndicator): + """The data structure is OrderedDict(str: SingleMetric). + Each SingleMetric based on pd.Series is one metric. + Str is the name of metric. + """ class SingleMetric: + """The data structure of the single metric. + The following methods are used for computing metrics in one indicator. + """ + def __init__(self, metric: Union[dict, pd.Series]): if isinstance(metric, dict): self.metric = pd.Series(metric) @@ -687,12 +749,6 @@ class PandasOrderIndicator(BaseOrderIndicator): def empty(self): return self.metric.empty - """ - @property - def index(self): - return self.metric.index - """ - def add(self, other, fill_value: None): return PandasOrderIndicator.SingleMetric(self.metric.add(other.metric, fill_value = fill_value)) @@ -705,7 +761,7 @@ class PandasOrderIndicator(BaseOrderIndicator): def assign(self, col: str, metric: Union[dict, pd.Series]): self.data[col] = self.SingleMetric(metric) - def transfer(self, func: "Callable", new_col = None): + def transfer(self, func: "Callable", new_col: str = None): func_sig = inspect.signature(func).parameters.keys() func_kwargs = {sig: self.data[sig] for sig in func_sig} tmp_metric = func(**func_kwargs) @@ -721,14 +777,12 @@ class PandasOrderIndicator(BaseOrderIndicator): @staticmethod def agg_all_indicators(indicators: list, metrics: Union[str, List[str]], fill_value = None): - """add all order indicators with same metric""" - metric_dict = {} if isinstance(metrics, str): metrics = [metrics] for metric in metrics: tmp_metric = PandasOrderIndicator.SingleMetric({}) for indicator in indicators: - tmp_metric.add(indicator.data[metric], fill_value) + tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) metric_dict[metric] = tmp_metric.metric return metric_dict \ No newline at end of file From 7b20abeda10ddbb851d68dbcf71177b980442f0a Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Tue, 13 Jul 2021 12:12:45 +0800 Subject: [PATCH 32/61] Add files via upload Add naive transformer model and a improved transformer model. --- qlib/contrib/model/pytorch_localformer.py | 341 ++++++++++++++++++++++ qlib/contrib/model/pytorch_transformer.py | 312 ++++++++++++++++++++ 2 files changed, 653 insertions(+) create mode 100644 qlib/contrib/model/pytorch_localformer.py create mode 100644 qlib/contrib/model/pytorch_transformer.py diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py new file mode 100644 index 000000000..f085bd4b2 --- /dev/null +++ b/qlib/contrib/model/pytorch_localformer.py @@ -0,0 +1,341 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import pandas as pd +import copy +import math +from ...utils import get_or_create_path +from ...log import get_module_logger + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader + +from .pytorch_utils import count_parameters +from ...model.base import Model +from ...data.dataset import DatasetH, TSDatasetH +from ...data.dataset.handler import DataHandlerLP +from torch.nn.modules.container import ModuleList + +import pdb + +# qrun benchmarks/Transformer/workflow_config_localformer_Alpha158.yaml +# 0.992366, @13, +''' +{'IC': 0.037426503365732174, + 'ICIR': 0.28977883455541603, + 'Rank IC': 0.04659889541774283, + 'Rank ICIR': 0.373569340092482} + +'The following are analysis results of the excess return without cost.' + risk +mean 0.000381 +std 0.004109 +annualized_return 0.096066 +information_ratio 1.472729 +max_drawdown -0.094917 +'The following are analysis results of the excess return with cost.' + risk +mean 0.000213 +std 0.004111 +annualized_return 0.053630 +information_ratio 0.821711 +max_drawdown -0.113694 +''' + + +class LocalformerModel(Model): + def __init__( + self, + d_feat: int = 20, + d_model: int = 64, + batch_size: int = 8192, + nhead: int = 2, + num_layers: int = 2, + dropout: float = 0, + n_epochs=100, + lr=0.0001, + metric="", + early_stop=5, + loss="mse", + optimizer="adam", + reg=1e-3, + n_jobs=10, + GPU=2, + seed=None, + **kwargs + ): + + # set hyper-parameters. + self.d_model = d_model + self.dropout = dropout + self.n_epochs = n_epochs + self.lr = lr + self.reg = reg + self.metric = metric + self.batch_size = batch_size + self.early_stop = early_stop + self.optimizer = optimizer.lower() + self.loss = loss + self.n_jobs = n_jobs + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.seed = seed + self.logger = get_module_logger("TransformerModel") + print('do we have gpu?{}'.format(torch.cuda.is_available())) + self.logger.info( + "Improved Transformer:" + "\nbatch_size : {}" + "\ndevice : {}".format(self.batch_size, self.device) + ) + + if self.seed is not None: + np.random.seed(self.seed) + torch.manual_seed(self.seed) + + self.model = Transformer(d_feat, d_model, nhead, num_layers, dropout, self.device) + if optimizer.lower() == "adam": + self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + elif optimizer.lower() == "gd": + self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + else: + raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + + self.fitted = False + self.model.to(self.device) + + @property + def use_gpu(self): + return self.device != torch.device("cpu") + + def mse(self, pred, label): + loss = (pred.float() - label.float()) ** 2 + return torch.mean(loss) + + def loss_fn(self, pred, label): + mask = ~torch.isnan(label) + + if self.loss == "mse": + return self.mse(pred[mask], label[mask]) + + raise ValueError("unknown loss `%s`" % self.loss) + + def metric_fn(self, pred, label): + + mask = torch.isfinite(label) + + if self.metric == "" or self.metric == "loss": + return -self.loss_fn(pred[mask], label[mask]) + + raise ValueError("unknown metric `%s`" % self.metric) + + def train_epoch(self, data_loader): + + self.model.train() + + for data in data_loader: + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + + self.train_optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) + self.train_optimizer.step() + + def test_epoch(self, data_loader): + + self.model.eval() + + scores = [] + losses = [] + + for data in data_loader: + + feature = data[:, :, 0:-1].to(self.device) + # feature[torch.isnan(feature)] = 0 + label = data[:, -1, -1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + losses.append(loss.item()) + + score = self.metric_fn(pred, label) + scores.append(score.item()) + + return np.mean(losses), np.mean(scores) + + def fit( + self, + dataset: DatasetH, + evals_result=dict(), + save_path=None, + ): + + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + + dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader + dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader + + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) + + save_path = get_or_create_path(save_path) + + stop_steps = 0 + train_loss = 0 + best_score = -np.inf + best_epoch = 0 + evals_result["train"] = [] + evals_result["valid"] = [] + + # train + self.logger.info("training...") + self.fitted = True + + for step in range(self.n_epochs): + self.logger.info("Epoch%d:", step) + self.logger.info("training...") + self.train_epoch(train_loader) + self.logger.info("evaluating...") + train_loss, train_score = self.test_epoch(train_loader) + val_loss, val_score = self.test_epoch(valid_loader) + self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) + evals_result["train"].append(train_score) + evals_result["valid"].append(val_score) + + if val_score > best_score: + best_score = val_score + stop_steps = 0 + best_epoch = step + best_param = copy.deepcopy(self.model.state_dict()) + else: + stop_steps += 1 + if stop_steps >= self.early_stop: + self.logger.info("early stop") + break + + self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch)) + self.model.load_state_dict(best_param) + torch.save(best_param, save_path) + + if self.use_gpu: + torch.cuda.empty_cache() + + def predict(self, dataset): + if not self.fitted: + raise ValueError("model is not fitted yet!") + + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) + dl_test.config(fillna_type="ffill+bfill") + test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + self.model.eval() + preds = [] + + for data in test_loader: + feature = data[:, :, 0:-1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()).detach().cpu().numpy() + + preds.append(pred) + + return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + + +class PositionalEncoding(nn.Module): + def __init__(self, d_model, max_len=1000): + super(PositionalEncoding, self).__init__() + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer('pe', pe) + + def forward(self, x): + # [T, N, F] + return x + self.pe[:x.size(0), :] + + +def _get_clones(module, N): + return ModuleList([copy.deepcopy(module) for i in range(N)]) + + +class LocalformerEncoder(nn.Module): + __constants__ = ['norm'] + + def __init__(self, encoder_layer, num_layers, d_model): + super(LocalformerEncoder, self).__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.conv = _get_clones(nn.Conv1d(d_model, d_model, 3, 1, 1), num_layers) + self.num_layers = num_layers + + def forward(self, src, mask): + output = src + out = src + + for i, mod in enumerate(self.layers): + # [T, N, F] --> [N, T, F] --> [N, F, T] + out = output.transpose(1, 0).transpose(2, 1) + out = self.conv[i](out).transpose(2, 1).transpose(1, 0) + + output = mod(output+out, src_mask=mask) + + return output + out + + +class Transformer(nn.Module): + def __init__(self, d_feat=6, d_model=8, nhead=4, num_layers=2, dropout=0.5, device=None): + super(Transformer, self).__init__() + self.rnn = nn.GRU( + input_size=d_model, + hidden_size=d_model, + num_layers=num_layers, + batch_first=False, + dropout=dropout, + ) + self.feature_layer = nn.Linear(d_feat, d_model) + self.pos_encoder = PositionalEncoding(d_model) + self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout) + self.transformer_encoder = LocalformerEncoder(self.encoder_layer, num_layers=num_layers, d_model=d_model) + self.decoder_layer = nn.Linear(d_model, 1) + self.device = device + self.d_feat = d_feat + + def forward(self, src): + # pdb.set_trace() + # src [N, T, F], [512, 60, 6] + + src = self.feature_layer(src) # [512, 60, 8] + + # src [N, T, F] --> [T, N, F], [60, 512, 8] + src = src.transpose(1, 0) # not batch first + + mask = None + + src = self.pos_encoder(src) + output = self.transformer_encoder(src, mask) # [60, 512, 8] + + output, _ = self.rnn(output) + + # [T, N, F] --> [N, T*F] + output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] + + return output.squeeze() + diff --git a/qlib/contrib/model/pytorch_transformer.py b/qlib/contrib/model/pytorch_transformer.py new file mode 100644 index 000000000..85582be1f --- /dev/null +++ b/qlib/contrib/model/pytorch_transformer.py @@ -0,0 +1,312 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import pandas as pd +import copy +import math +from ...utils import get_or_create_path +from ...log import get_module_logger + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader + +from .pytorch_utils import count_parameters +from ...model.base import Model +from ...data.dataset import DatasetH, TSDatasetH +from ...data.dataset.handler import DataHandlerLP + +import pdb + +# qrun benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml +# 0.993681, @11, +''' + 'IC': 0.03186587768611013, + 'ICIR': 0.2556910881045764, + 'Rank IC': 0.04735251936658551, + 'Rank ICIR': 0.388378955424602 + +'The following are analysis results of the excess return without cost.' + risk +mean 0.000309 +std 0.004209 +annualized_return 0.077839 +information_ratio 1.164993 +max_drawdown -0.106215 +'The following are analysis results of the excess return with cost.' + risk +mean 0.000126 +std 0.004209 +annualized_return 0.031707 +information_ratio 0.474567 +max_drawdown -0.131948 +''' + + +class TransformerModel(Model): + def __init__( + self, + d_feat: int = 20, + d_model: int = 64, + batch_size: int = 8192, + nhead: int = 2, + num_layers: int = 2, + dropout: float = 0, + n_epochs=100, + lr=0.0001, + metric="", + early_stop=5, + loss="mse", + optimizer="adam", + reg=1e-3, + n_jobs=10, + GPU=0, + seed=None, + **kwargs + ): + + # set hyper-parameters. + self.d_model = d_model + self.dropout = dropout + self.n_epochs = n_epochs + self.lr = lr + self.reg = reg + self.metric = metric + self.batch_size = batch_size + self.early_stop = early_stop + self.optimizer = optimizer.lower() + self.loss = loss + self.n_jobs = n_jobs + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.seed = seed + self.logger = get_module_logger("TransformerModel") + print('do we have gpu?{}'.format(torch.cuda.is_available())) + self.logger.info( + "Naive Transformer:" + "\nbatch_size : {}" + "\ndevice : {}".format(self.batch_size, self.device) + ) + + if self.seed is not None: + np.random.seed(self.seed) + torch.manual_seed(self.seed) + + self.model = Transformer(d_feat, d_model, nhead, num_layers, dropout, self.device) + if optimizer.lower() == "adam": + self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + elif optimizer.lower() == "gd": + self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + else: + raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + + self.fitted = False + self.model.to(self.device) + + @property + def use_gpu(self): + return self.device != torch.device("cpu") + + def mse(self, pred, label): + loss = (pred.float() - label.float()) ** 2 + return torch.mean(loss) + + def loss_fn(self, pred, label): + mask = ~torch.isnan(label) + + if self.loss == "mse": + return self.mse(pred[mask], label[mask]) + + raise ValueError("unknown loss `%s`" % self.loss) + + def metric_fn(self, pred, label): + + mask = torch.isfinite(label) + + if self.metric == "" or self.metric == "loss": + return -self.loss_fn(pred[mask], label[mask]) + + raise ValueError("unknown metric `%s`" % self.metric) + + def train_epoch(self, data_loader): + + self.model.train() + + for data in data_loader: + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + + self.train_optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) + self.train_optimizer.step() + + def test_epoch(self, data_loader): + + self.model.eval() + + scores = [] + losses = [] + + for data in data_loader: + + feature = data[:, :, 0:-1].to(self.device) + # feature[torch.isnan(feature)] = 0 + label = data[:, -1, -1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + losses.append(loss.item()) + + score = self.metric_fn(pred, label) + scores.append(score.item()) + + return np.mean(losses), np.mean(scores) + + def fit( + self, + dataset: DatasetH, + evals_result=dict(), + save_path=None, + ): + + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + + dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader + dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader + + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) + + save_path = get_or_create_path(save_path) + + stop_steps = 0 + train_loss = 0 + best_score = -np.inf + best_epoch = 0 + evals_result["train"] = [] + evals_result["valid"] = [] + + # train + self.logger.info("training...") + self.fitted = True + + for step in range(self.n_epochs): + self.logger.info("Epoch%d:", step) + self.logger.info("training...") + self.train_epoch(train_loader) + self.logger.info("evaluating...") + train_loss, train_score = self.test_epoch(train_loader) + val_loss, val_score = self.test_epoch(valid_loader) + self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) + evals_result["train"].append(train_score) + evals_result["valid"].append(val_score) + + if val_score > best_score: + best_score = val_score + stop_steps = 0 + best_epoch = step + best_param = copy.deepcopy(self.model.state_dict()) + else: + stop_steps += 1 + if stop_steps >= self.early_stop: + self.logger.info("early stop") + break + + self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch)) + self.model.load_state_dict(best_param) + torch.save(best_param, save_path) + + if self.use_gpu: + torch.cuda.empty_cache() + + def predict(self, dataset): + if not self.fitted: + raise ValueError("model is not fitted yet!") + + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) + dl_test.config(fillna_type="ffill+bfill") + test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + self.model.eval() + preds = [] + + for data in test_loader: + feature = data[:, :, 0:-1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()).detach().cpu().numpy() + + preds.append(pred) + + return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + + +class PositionalEncoding(nn.Module): + def __init__(self, d_model, max_len=1000): + super(PositionalEncoding, self).__init__() + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer('pe', pe) + + def forward(self, x): + # [T, N, F] + return x + self.pe[:x.size(0), :] + + +class Transformer(nn.Module): + def __init__(self, d_feat=6, d_model=8, nhead=4, num_layers=2, dropout=0.5, device=None): + super(Transformer, self).__init__() + self.rnn = nn.GRU( + input_size=d_feat, + hidden_size=d_model, + num_layers=num_layers, + batch_first=True, + dropout=dropout, + ) + self.feature_layer = nn.Linear(d_feat, d_model) + self.pos_encoder = PositionalEncoding(d_model) + self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout) + self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers) + self.decoder_layer = nn.Linear(d_model, 1) + self.device = device + self.d_feat = d_feat + + def forward(self, src): + # pdb.set_trace() + # src [N, T, F], [512, 60, 6] + + # out, _ = self.rnn(src) + src = self.feature_layer(src) # [512, 60, 8] + + # src [N, T, F] --> [T, N, F], [60, 512, 8] + src = src.transpose(1, 0) # not batch first + + mask = None + + src = self.pos_encoder(src) + output = self.transformer_encoder(src, mask) # [60, 512, 8] + + # [T, N, F] --> [N, T*F] + output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] + + return output.squeeze() + From dd0eebed53634cb1d51b87260332255ce4f6c5fa Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Wed, 14 Jul 2021 15:49:17 +0800 Subject: [PATCH 33/61] Update pytorch_localformer.py Have passed black. --- qlib/contrib/model/pytorch_localformer.py | 43 +++-------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py index f085bd4b2..bdf77b5be 100644 --- a/qlib/contrib/model/pytorch_localformer.py +++ b/qlib/contrib/model/pytorch_localformer.py @@ -24,32 +24,6 @@ from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP from torch.nn.modules.container import ModuleList -import pdb - -# qrun benchmarks/Transformer/workflow_config_localformer_Alpha158.yaml -# 0.992366, @13, -''' -{'IC': 0.037426503365732174, - 'ICIR': 0.28977883455541603, - 'Rank IC': 0.04659889541774283, - 'Rank ICIR': 0.373569340092482} - -'The following are analysis results of the excess return without cost.' - risk -mean 0.000381 -std 0.004109 -annualized_return 0.096066 -information_ratio 1.472729 -max_drawdown -0.094917 -'The following are analysis results of the excess return with cost.' - risk -mean 0.000213 -std 0.004111 -annualized_return 0.053630 -information_ratio 0.821711 -max_drawdown -0.113694 -''' - class LocalformerModel(Model): def __init__( @@ -88,11 +62,8 @@ class LocalformerModel(Model): self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed self.logger = get_module_logger("TransformerModel") - print('do we have gpu?{}'.format(torch.cuda.is_available())) self.logger.info( - "Improved Transformer:" - "\nbatch_size : {}" - "\ndevice : {}".format(self.batch_size, self.device) + "Improved Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device) ) if self.seed is not None: @@ -161,7 +132,6 @@ class LocalformerModel(Model): for data in data_loader: feature = data[:, :, 0:-1].to(self.device) - # feature[torch.isnan(feature)] = 0 label = data[:, -1, -1].to(self.device) with torch.no_grad(): @@ -266,11 +236,11 @@ class PositionalEncoding(nn.Module): pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) - self.register_buffer('pe', pe) + self.register_buffer("pe", pe) def forward(self, x): # [T, N, F] - return x + self.pe[:x.size(0), :] + return x + self.pe[: x.size(0), :] def _get_clones(module, N): @@ -278,7 +248,7 @@ def _get_clones(module, N): class LocalformerEncoder(nn.Module): - __constants__ = ['norm'] + __constants__ = ["norm"] def __init__(self, encoder_layer, num_layers, d_model): super(LocalformerEncoder, self).__init__() @@ -295,7 +265,7 @@ class LocalformerEncoder(nn.Module): out = output.transpose(1, 0).transpose(2, 1) out = self.conv[i](out).transpose(2, 1).transpose(1, 0) - output = mod(output+out, src_mask=mask) + output = mod(output + out, src_mask=mask) return output + out @@ -319,9 +289,7 @@ class Transformer(nn.Module): self.d_feat = d_feat def forward(self, src): - # pdb.set_trace() # src [N, T, F], [512, 60, 6] - src = self.feature_layer(src) # [512, 60, 8] # src [N, T, F] --> [T, N, F], [60, 512, 8] @@ -338,4 +306,3 @@ class Transformer(nn.Module): output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] return output.squeeze() - From 096ef5a62bf271dca634ef40985fc6a8d6bd2937 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Wed, 14 Jul 2021 15:50:33 +0800 Subject: [PATCH 34/61] Update pytorch_transformer.py Have passed black --- qlib/contrib/model/pytorch_transformer.py | 49 ++--------------------- 1 file changed, 3 insertions(+), 46 deletions(-) diff --git a/qlib/contrib/model/pytorch_transformer.py b/qlib/contrib/model/pytorch_transformer.py index 85582be1f..c53564903 100644 --- a/qlib/contrib/model/pytorch_transformer.py +++ b/qlib/contrib/model/pytorch_transformer.py @@ -23,32 +23,6 @@ from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP -import pdb - -# qrun benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml -# 0.993681, @11, -''' - 'IC': 0.03186587768611013, - 'ICIR': 0.2556910881045764, - 'Rank IC': 0.04735251936658551, - 'Rank ICIR': 0.388378955424602 - -'The following are analysis results of the excess return without cost.' - risk -mean 0.000309 -std 0.004209 -annualized_return 0.077839 -information_ratio 1.164993 -max_drawdown -0.106215 -'The following are analysis results of the excess return with cost.' - risk -mean 0.000126 -std 0.004209 -annualized_return 0.031707 -information_ratio 0.474567 -max_drawdown -0.131948 -''' - class TransformerModel(Model): def __init__( @@ -87,12 +61,7 @@ class TransformerModel(Model): self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed self.logger = get_module_logger("TransformerModel") - print('do we have gpu?{}'.format(torch.cuda.is_available())) - self.logger.info( - "Naive Transformer:" - "\nbatch_size : {}" - "\ndevice : {}".format(self.batch_size, self.device) - ) + self.logger.info("Naive Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device)) if self.seed is not None: np.random.seed(self.seed) @@ -160,7 +129,6 @@ class TransformerModel(Model): for data in data_loader: feature = data[:, :, 0:-1].to(self.device) - # feature[torch.isnan(feature)] = 0 label = data[:, -1, -1].to(self.device) with torch.no_grad(): @@ -265,23 +233,16 @@ class PositionalEncoding(nn.Module): pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) - self.register_buffer('pe', pe) + self.register_buffer("pe", pe) def forward(self, x): # [T, N, F] - return x + self.pe[:x.size(0), :] + return x + self.pe[: x.size(0), :] class Transformer(nn.Module): def __init__(self, d_feat=6, d_model=8, nhead=4, num_layers=2, dropout=0.5, device=None): super(Transformer, self).__init__() - self.rnn = nn.GRU( - input_size=d_feat, - hidden_size=d_model, - num_layers=num_layers, - batch_first=True, - dropout=dropout, - ) self.feature_layer = nn.Linear(d_feat, d_model) self.pos_encoder = PositionalEncoding(d_model) self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout) @@ -291,10 +252,7 @@ class Transformer(nn.Module): self.d_feat = d_feat def forward(self, src): - # pdb.set_trace() # src [N, T, F], [512, 60, 6] - - # out, _ = self.rnn(src) src = self.feature_layer(src) # [512, 60, 8] # src [N, T, F] --> [T, N, F], [60, 512, 8] @@ -309,4 +267,3 @@ class Transformer(nn.Module): output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] return output.squeeze() - From 2eee064eb8c3f025103c19aa2ca7b89e021b86a8 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Wed, 14 Jul 2021 16:11:51 +0800 Subject: [PATCH 35/61] Add files via upload --- .../workflow_config_localformer_Alpha158.yaml | 82 +++++++++++++++++++ .../workflow_config_localformer_Alpha360.yaml | 82 +++++++++++++++++++ .../workflow_config_transformer_Alpha158.yaml | 82 +++++++++++++++++++ .../workflow_config_transformer_Alpha360.yaml | 82 +++++++++++++++++++ 4 files changed, 328 insertions(+) create mode 100644 examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml create mode 100644 examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml create mode 100644 examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml create mode 100644 examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml new file mode 100644 index 000000000..98090356e --- /dev/null +++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml @@ -0,0 +1,82 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: FilterCol + kwargs: + fields_group: feature + col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", + "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", + "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" + ] + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LocalformerModel + module_path: qlib.contrib.model.pytorch_localformer + kwargs: + seed: 0 + n_jobs: 20 + dataset: + class: TSDatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + step_len: 20 + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml new file mode 100644 index 000000000..9792a2357 --- /dev/null +++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml @@ -0,0 +1,82 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: FilterCol + kwargs: + fields_group: feature + col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", + "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", + "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" + ] + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LocalformerModel + module_path: qlib.contrib.model.pytorch_localformer + kwargs: + seed: 0 + n_jobs: 20 + dataset: + class: TSDatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha360 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + step_len: 20 + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml new file mode 100644 index 000000000..e58c20541 --- /dev/null +++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml @@ -0,0 +1,82 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: FilterCol + kwargs: + fields_group: feature + col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", + "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", + "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" + ] + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: TransformerModel + module_path: qlib.contrib.model.pytorch_transformer + kwargs: + seed: 0 + n_jobs: 20 + dataset: + class: TSDatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + step_len: 20 + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml new file mode 100644 index 000000000..59d5fa2b9 --- /dev/null +++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml @@ -0,0 +1,82 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: FilterCol + kwargs: + fields_group: feature + col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", + "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", + "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" + ] + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: TransformerModel + module_path: qlib.contrib.model.pytorch_transformer + kwargs: + seed: 0 + n_jobs: 20 + dataset: + class: TSDatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha360 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + step_len: 20 + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config From 0c3eaf3f162487227df22051e355053908c06834 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Wed, 14 Jul 2021 16:18:35 +0800 Subject: [PATCH 36/61] Add files via upload --- examples/benchmarks/Localformer/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 examples/benchmarks/Localformer/requirements.txt diff --git a/examples/benchmarks/Localformer/requirements.txt b/examples/benchmarks/Localformer/requirements.txt new file mode 100644 index 000000000..d5b918797 --- /dev/null +++ b/examples/benchmarks/Localformer/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.17.4 +pandas==1.1.2 +torch==1.2.0 \ No newline at end of file From 2df9b6e0769d2988392790573b4983a4b91cf1fe Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Wed, 14 Jul 2021 16:24:01 +0800 Subject: [PATCH 37/61] Add files via upload --- examples/benchmarks/Transformer/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 examples/benchmarks/Transformer/requirements.txt diff --git a/examples/benchmarks/Transformer/requirements.txt b/examples/benchmarks/Transformer/requirements.txt new file mode 100644 index 000000000..d5b918797 --- /dev/null +++ b/examples/benchmarks/Transformer/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.17.4 +pandas==1.1.2 +torch==1.2.0 \ No newline at end of file From 35840606a8721b031ad5ab63239a859a29782f98 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Fri, 16 Jul 2021 15:05:32 +0800 Subject: [PATCH 38/61] Update pytorch_localformer.py --- qlib/contrib/model/pytorch_localformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py index bdf77b5be..683a9bd4f 100644 --- a/qlib/contrib/model/pytorch_localformer.py +++ b/qlib/contrib/model/pytorch_localformer.py @@ -42,7 +42,7 @@ class LocalformerModel(Model): optimizer="adam", reg=1e-3, n_jobs=10, - GPU=2, + GPU=0, seed=None, **kwargs ): From bee031af68cd0864c8329de13608c2d4feb58fc1 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Fri, 16 Jul 2021 18:33:11 +0800 Subject: [PATCH 39/61] Add files via upload --- qlib/contrib/model/pytorch_localformer.py | 100 +++--- qlib/contrib/model/pytorch_localformer_ts.py | 310 +++++++++++++++++++ qlib/contrib/model/pytorch_transformer.py | 96 +++--- qlib/contrib/model/pytorch_transformer_ts.py | 269 ++++++++++++++++ 4 files changed, 700 insertions(+), 75 deletions(-) create mode 100644 qlib/contrib/model/pytorch_localformer_ts.py create mode 100644 qlib/contrib/model/pytorch_transformer_ts.py diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py index 683a9bd4f..1b722ead2 100644 --- a/qlib/contrib/model/pytorch_localformer.py +++ b/qlib/contrib/model/pytorch_localformer.py @@ -8,6 +8,7 @@ from __future__ import print_function import os import numpy as np import pandas as pd +from typing import Text, Union import copy import math from ...utils import get_or_create_path @@ -23,6 +24,7 @@ from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP from torch.nn.modules.container import ModuleList +# qrun examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml ” class LocalformerModel(Model): @@ -30,7 +32,7 @@ class LocalformerModel(Model): self, d_feat: int = 20, d_model: int = 64, - batch_size: int = 8192, + batch_size: int = 2048, nhead: int = 2, num_layers: int = 2, dropout: float = 0, @@ -62,9 +64,7 @@ class LocalformerModel(Model): self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed self.logger = get_module_logger("TransformerModel") - self.logger.info( - "Improved Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device) - ) + self.logger.info("Naive Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device)) if self.seed is not None: np.random.seed(self.seed) @@ -106,15 +106,25 @@ class LocalformerModel(Model): raise ValueError("unknown metric `%s`" % self.metric) - def train_epoch(self, data_loader): + def train_epoch(self, x_train, y_train): + + x_train_values = x_train.values + y_train_values = np.squeeze(y_train.values) self.model.train() - for data in data_loader: - feature = data[:, :, 0:-1].to(self.device) - label = data[:, -1, -1].to(self.device) + indices = np.arange(len(x_train_values)) + np.random.shuffle(indices) - pred = self.model(feature.float()) # .float() + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device) + + pred = self.model(feature) loss = self.loss_fn(pred, label) self.train_optimizer.zero_grad() @@ -122,20 +132,29 @@ class LocalformerModel(Model): torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) self.train_optimizer.step() - def test_epoch(self, data_loader): + def test_epoch(self, data_x, data_y): + + # prepare training data + x_values = data_x.values + y_values = np.squeeze(data_y.values) self.model.eval() scores = [] losses = [] - for data in data_loader: + indices = np.arange(len(x_values)) - feature = data[:, :, 0:-1].to(self.device) - label = data[:, -1, -1].to(self.device) + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_values[indices[i: i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_values[indices[i: i + self.batch_size]]).float().to(self.device) with torch.no_grad(): - pred = self.model(feature.float()) # .float() + pred = self.model(feature) loss = self.loss_fn(pred, label) losses.append(loss.item()) @@ -151,21 +170,16 @@ class LocalformerModel(Model): save_path=None, ): - dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) - - dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader - dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader - - train_loader = DataLoader( - dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True - ) - valid_loader = DataLoader( - dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + df_train, df_valid, df_test = dataset.prepare( + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) + x_train, y_train = df_train["feature"], df_train["label"] + x_valid, y_valid = df_valid["feature"], df_valid["label"] + save_path = get_or_create_path(save_path) - stop_steps = 0 train_loss = 0 best_score = -np.inf @@ -180,10 +194,10 @@ class LocalformerModel(Model): for step in range(self.n_epochs): self.logger.info("Epoch%d:", step) self.logger.info("training...") - self.train_epoch(train_loader) + self.train_epoch(x_train, y_train) self.logger.info("evaluating...") - train_loss, train_score = self.test_epoch(train_loader) - val_loss, val_score = self.test_epoch(valid_loader) + train_loss, train_score = self.test_epoch(x_train, y_train) + val_loss, val_score = self.test_epoch(x_valid, y_valid) self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) evals_result["train"].append(train_score) evals_result["valid"].append(val_score) @@ -206,25 +220,32 @@ class LocalformerModel(Model): if self.use_gpu: torch.cuda.empty_cache() - def predict(self, dataset): + def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"): if not self.fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) - dl_test.config(fillna_type="ffill+bfill") - test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I) + index = x_test.index self.model.eval() + x_values = x_test.values + sample_num = x_values.shape[0] preds = [] - for data in test_loader: - feature = data[:, :, 0:-1].to(self.device) + for begin in range(sample_num)[:: self.batch_size]: + + if sample_num - begin < self.batch_size: + end = sample_num + else: + end = begin + self.batch_size + + x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device) with torch.no_grad(): - pred = self.model(feature.float()).detach().cpu().numpy() + pred = self.model(x_batch).detach().cpu().numpy() preds.append(pred) - return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + return pd.Series(np.concatenate(preds), index=index) class PositionalEncoding(nn.Module): @@ -289,8 +310,9 @@ class Transformer(nn.Module): self.d_feat = d_feat def forward(self, src): - # src [N, T, F], [512, 60, 6] - src = self.feature_layer(src) # [512, 60, 8] + # src [N, F*T] --> [N, T, F] + src = src.reshape(len(src), self.d_feat, -1).permute(0, 2, 1) + src = self.feature_layer(src) # src [N, T, F] --> [T, N, F], [60, 512, 8] src = src.transpose(1, 0) # not batch first diff --git a/qlib/contrib/model/pytorch_localformer_ts.py b/qlib/contrib/model/pytorch_localformer_ts.py new file mode 100644 index 000000000..aa7af84df --- /dev/null +++ b/qlib/contrib/model/pytorch_localformer_ts.py @@ -0,0 +1,310 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import pandas as pd +import copy +import math +from ...utils import get_or_create_path +from ...log import get_module_logger + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader + +from .pytorch_utils import count_parameters +from ...model.base import Model +from ...data.dataset import DatasetH, TSDatasetH +from ...data.dataset.handler import DataHandlerLP +from torch.nn.modules.container import ModuleList + + +class LocalformerModel(Model): + def __init__( + self, + d_feat: int = 20, + d_model: int = 64, + batch_size: int = 8192, + nhead: int = 2, + num_layers: int = 2, + dropout: float = 0, + n_epochs=100, + lr=0.0001, + metric="", + early_stop=5, + loss="mse", + optimizer="adam", + reg=1e-3, + n_jobs=10, + GPU=2, + seed=None, + **kwargs + ): + + # set hyper-parameters. + self.d_model = d_model + self.dropout = dropout + self.n_epochs = n_epochs + self.lr = lr + self.reg = reg + self.metric = metric + self.batch_size = batch_size + self.early_stop = early_stop + self.optimizer = optimizer.lower() + self.loss = loss + self.n_jobs = n_jobs + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.seed = seed + self.logger = get_module_logger("TransformerModel") + self.logger.info( + "Improved Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device) + ) + + if self.seed is not None: + np.random.seed(self.seed) + torch.manual_seed(self.seed) + + self.model = Transformer(d_feat, d_model, nhead, num_layers, dropout, self.device) + if optimizer.lower() == "adam": + self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + elif optimizer.lower() == "gd": + self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + else: + raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + + self.fitted = False + self.model.to(self.device) + + @property + def use_gpu(self): + return self.device != torch.device("cpu") + + def mse(self, pred, label): + loss = (pred.float() - label.float()) ** 2 + return torch.mean(loss) + + def loss_fn(self, pred, label): + mask = ~torch.isnan(label) + + if self.loss == "mse": + return self.mse(pred[mask], label[mask]) + + raise ValueError("unknown loss `%s`" % self.loss) + + def metric_fn(self, pred, label): + + mask = torch.isfinite(label) + + if self.metric == "" or self.metric == "loss": + return -self.loss_fn(pred[mask], label[mask]) + + raise ValueError("unknown metric `%s`" % self.metric) + + def train_epoch(self, data_loader): + + self.model.train() + + for data in data_loader: + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + + self.train_optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) + self.train_optimizer.step() + + def test_epoch(self, data_loader): + + self.model.eval() + + scores = [] + losses = [] + + for data in data_loader: + + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + losses.append(loss.item()) + + score = self.metric_fn(pred, label) + scores.append(score.item()) + + return np.mean(losses), np.mean(scores) + + def fit( + self, + dataset: DatasetH, + evals_result=dict(), + save_path=None, + ): + + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + import pdb + pdb.set_trace() + + dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader + dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader + + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) + + save_path = get_or_create_path(save_path) + + stop_steps = 0 + train_loss = 0 + best_score = -np.inf + best_epoch = 0 + evals_result["train"] = [] + evals_result["valid"] = [] + + # train + self.logger.info("training...") + self.fitted = True + + for step in range(self.n_epochs): + self.logger.info("Epoch%d:", step) + self.logger.info("training...") + self.train_epoch(train_loader) + self.logger.info("evaluating...") + train_loss, train_score = self.test_epoch(train_loader) + val_loss, val_score = self.test_epoch(valid_loader) + self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) + evals_result["train"].append(train_score) + evals_result["valid"].append(val_score) + + if val_score > best_score: + best_score = val_score + stop_steps = 0 + best_epoch = step + best_param = copy.deepcopy(self.model.state_dict()) + else: + stop_steps += 1 + if stop_steps >= self.early_stop: + self.logger.info("early stop") + break + + self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch)) + self.model.load_state_dict(best_param) + torch.save(best_param, save_path) + + if self.use_gpu: + torch.cuda.empty_cache() + + def predict(self, dataset): + if not self.fitted: + raise ValueError("model is not fitted yet!") + + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) + dl_test.config(fillna_type="ffill+bfill") + test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + self.model.eval() + preds = [] + + for data in test_loader: + feature = data[:, :, 0:-1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()).detach().cpu().numpy() + + preds.append(pred) + + return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + + +class PositionalEncoding(nn.Module): + def __init__(self, d_model, max_len=1000): + super(PositionalEncoding, self).__init__() + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer("pe", pe) + + def forward(self, x): + # [T, N, F] + return x + self.pe[: x.size(0), :] + + +def _get_clones(module, N): + return ModuleList([copy.deepcopy(module) for i in range(N)]) + + +class LocalformerEncoder(nn.Module): + __constants__ = ["norm"] + + def __init__(self, encoder_layer, num_layers, d_model): + super(LocalformerEncoder, self).__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.conv = _get_clones(nn.Conv1d(d_model, d_model, 3, 1, 1), num_layers) + self.num_layers = num_layers + + def forward(self, src, mask): + output = src + out = src + + for i, mod in enumerate(self.layers): + # [T, N, F] --> [N, T, F] --> [N, F, T] + out = output.transpose(1, 0).transpose(2, 1) + out = self.conv[i](out).transpose(2, 1).transpose(1, 0) + + output = mod(output + out, src_mask=mask) + + return output + out + + +class Transformer(nn.Module): + def __init__(self, d_feat=6, d_model=8, nhead=4, num_layers=2, dropout=0.5, device=None): + super(Transformer, self).__init__() + self.rnn = nn.GRU( + input_size=d_model, + hidden_size=d_model, + num_layers=num_layers, + batch_first=False, + dropout=dropout, + ) + self.feature_layer = nn.Linear(d_feat, d_model) + self.pos_encoder = PositionalEncoding(d_model) + self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout) + self.transformer_encoder = LocalformerEncoder(self.encoder_layer, num_layers=num_layers, d_model=d_model) + self.decoder_layer = nn.Linear(d_model, 1) + self.device = device + self.d_feat = d_feat + + def forward(self, src): + # src [N, T, F], [512, 60, 6] + src = self.feature_layer(src) # [512, 60, 8] + + # src [N, T, F] --> [T, N, F], [60, 512, 8] + src = src.transpose(1, 0) # not batch first + + mask = None + + src = self.pos_encoder(src) + output = self.transformer_encoder(src, mask) # [60, 512, 8] + + output, _ = self.rnn(output) + + # [T, N, F] --> [N, T*F] + output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] + + return output.squeeze() diff --git a/qlib/contrib/model/pytorch_transformer.py b/qlib/contrib/model/pytorch_transformer.py index c53564903..cca7a7871 100644 --- a/qlib/contrib/model/pytorch_transformer.py +++ b/qlib/contrib/model/pytorch_transformer.py @@ -8,6 +8,7 @@ from __future__ import print_function import os import numpy as np import pandas as pd +from typing import Text, Union import copy import math from ...utils import get_or_create_path @@ -22,6 +23,7 @@ from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP +# qrun examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml ” class TransformerModel(Model): @@ -29,7 +31,7 @@ class TransformerModel(Model): self, d_feat: int = 20, d_model: int = 64, - batch_size: int = 8192, + batch_size: int = 2048, nhead: int = 2, num_layers: int = 2, dropout: float = 0, @@ -103,15 +105,25 @@ class TransformerModel(Model): raise ValueError("unknown metric `%s`" % self.metric) - def train_epoch(self, data_loader): + def train_epoch(self, x_train, y_train): + + x_train_values = x_train.values + y_train_values = np.squeeze(y_train.values) self.model.train() - for data in data_loader: - feature = data[:, :, 0:-1].to(self.device) - label = data[:, -1, -1].to(self.device) + indices = np.arange(len(x_train_values)) + np.random.shuffle(indices) - pred = self.model(feature.float()) # .float() + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device) + + pred = self.model(feature) loss = self.loss_fn(pred, label) self.train_optimizer.zero_grad() @@ -119,20 +131,29 @@ class TransformerModel(Model): torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) self.train_optimizer.step() - def test_epoch(self, data_loader): + def test_epoch(self, data_x, data_y): + + # prepare training data + x_values = data_x.values + y_values = np.squeeze(data_y.values) self.model.eval() scores = [] losses = [] - for data in data_loader: + indices = np.arange(len(x_values)) - feature = data[:, :, 0:-1].to(self.device) - label = data[:, -1, -1].to(self.device) + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_values[indices[i: i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_values[indices[i: i + self.batch_size]]).float().to(self.device) with torch.no_grad(): - pred = self.model(feature.float()) # .float() + pred = self.model(feature) loss = self.loss_fn(pred, label) losses.append(loss.item()) @@ -148,21 +169,16 @@ class TransformerModel(Model): save_path=None, ): - dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) - dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) - - dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader - dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader - - train_loader = DataLoader( - dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True - ) - valid_loader = DataLoader( - dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + df_train, df_valid, df_test = dataset.prepare( + ["train", "valid", "test"], + col_set=["feature", "label"], + data_key=DataHandlerLP.DK_L, ) + x_train, y_train = df_train["feature"], df_train["label"] + x_valid, y_valid = df_valid["feature"], df_valid["label"] + save_path = get_or_create_path(save_path) - stop_steps = 0 train_loss = 0 best_score = -np.inf @@ -177,10 +193,10 @@ class TransformerModel(Model): for step in range(self.n_epochs): self.logger.info("Epoch%d:", step) self.logger.info("training...") - self.train_epoch(train_loader) + self.train_epoch(x_train, y_train) self.logger.info("evaluating...") - train_loss, train_score = self.test_epoch(train_loader) - val_loss, val_score = self.test_epoch(valid_loader) + train_loss, train_score = self.test_epoch(x_train, y_train) + val_loss, val_score = self.test_epoch(x_valid, y_valid) self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) evals_result["train"].append(train_score) evals_result["valid"].append(val_score) @@ -203,25 +219,32 @@ class TransformerModel(Model): if self.use_gpu: torch.cuda.empty_cache() - def predict(self, dataset): + def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"): if not self.fitted: raise ValueError("model is not fitted yet!") - dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) - dl_test.config(fillna_type="ffill+bfill") - test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I) + index = x_test.index self.model.eval() + x_values = x_test.values + sample_num = x_values.shape[0] preds = [] - for data in test_loader: - feature = data[:, :, 0:-1].to(self.device) + for begin in range(sample_num)[:: self.batch_size]: + + if sample_num - begin < self.batch_size: + end = sample_num + else: + end = begin + self.batch_size + + x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device) with torch.no_grad(): - pred = self.model(feature.float()).detach().cpu().numpy() + pred = self.model(x_batch).detach().cpu().numpy() preds.append(pred) - return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + return pd.Series(np.concatenate(preds), index=index) class PositionalEncoding(nn.Module): @@ -252,8 +275,9 @@ class Transformer(nn.Module): self.d_feat = d_feat def forward(self, src): - # src [N, T, F], [512, 60, 6] - src = self.feature_layer(src) # [512, 60, 8] + # src [N, F*T] --> [N, T, F] + src = src.reshape(len(src), self.d_feat, -1).permute(0, 2, 1) + src = self.feature_layer(src) # src [N, T, F] --> [T, N, F], [60, 512, 8] src = src.transpose(1, 0) # not batch first diff --git a/qlib/contrib/model/pytorch_transformer_ts.py b/qlib/contrib/model/pytorch_transformer_ts.py new file mode 100644 index 000000000..c53564903 --- /dev/null +++ b/qlib/contrib/model/pytorch_transformer_ts.py @@ -0,0 +1,269 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import pandas as pd +import copy +import math +from ...utils import get_or_create_path +from ...log import get_module_logger + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader + +from .pytorch_utils import count_parameters +from ...model.base import Model +from ...data.dataset import DatasetH, TSDatasetH +from ...data.dataset.handler import DataHandlerLP + + +class TransformerModel(Model): + def __init__( + self, + d_feat: int = 20, + d_model: int = 64, + batch_size: int = 8192, + nhead: int = 2, + num_layers: int = 2, + dropout: float = 0, + n_epochs=100, + lr=0.0001, + metric="", + early_stop=5, + loss="mse", + optimizer="adam", + reg=1e-3, + n_jobs=10, + GPU=0, + seed=None, + **kwargs + ): + + # set hyper-parameters. + self.d_model = d_model + self.dropout = dropout + self.n_epochs = n_epochs + self.lr = lr + self.reg = reg + self.metric = metric + self.batch_size = batch_size + self.early_stop = early_stop + self.optimizer = optimizer.lower() + self.loss = loss + self.n_jobs = n_jobs + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.seed = seed + self.logger = get_module_logger("TransformerModel") + self.logger.info("Naive Transformer:" "\nbatch_size : {}" "\ndevice : {}".format(self.batch_size, self.device)) + + if self.seed is not None: + np.random.seed(self.seed) + torch.manual_seed(self.seed) + + self.model = Transformer(d_feat, d_model, nhead, num_layers, dropout, self.device) + if optimizer.lower() == "adam": + self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + elif optimizer.lower() == "gd": + self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.lr, weight_decay=self.reg) + else: + raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + + self.fitted = False + self.model.to(self.device) + + @property + def use_gpu(self): + return self.device != torch.device("cpu") + + def mse(self, pred, label): + loss = (pred.float() - label.float()) ** 2 + return torch.mean(loss) + + def loss_fn(self, pred, label): + mask = ~torch.isnan(label) + + if self.loss == "mse": + return self.mse(pred[mask], label[mask]) + + raise ValueError("unknown loss `%s`" % self.loss) + + def metric_fn(self, pred, label): + + mask = torch.isfinite(label) + + if self.metric == "" or self.metric == "loss": + return -self.loss_fn(pred[mask], label[mask]) + + raise ValueError("unknown metric `%s`" % self.metric) + + def train_epoch(self, data_loader): + + self.model.train() + + for data in data_loader: + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + + self.train_optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(self.model.parameters(), 3.0) + self.train_optimizer.step() + + def test_epoch(self, data_loader): + + self.model.eval() + + scores = [] + losses = [] + + for data in data_loader: + + feature = data[:, :, 0:-1].to(self.device) + label = data[:, -1, -1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()) # .float() + loss = self.loss_fn(pred, label) + losses.append(loss.item()) + + score = self.metric_fn(pred, label) + scores.append(score.item()) + + return np.mean(losses), np.mean(scores) + + def fit( + self, + dataset: DatasetH, + evals_result=dict(), + save_path=None, + ): + + dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) + + dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader + dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader + + train_loader = DataLoader( + dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True + ) + valid_loader = DataLoader( + dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True + ) + + save_path = get_or_create_path(save_path) + + stop_steps = 0 + train_loss = 0 + best_score = -np.inf + best_epoch = 0 + evals_result["train"] = [] + evals_result["valid"] = [] + + # train + self.logger.info("training...") + self.fitted = True + + for step in range(self.n_epochs): + self.logger.info("Epoch%d:", step) + self.logger.info("training...") + self.train_epoch(train_loader) + self.logger.info("evaluating...") + train_loss, train_score = self.test_epoch(train_loader) + val_loss, val_score = self.test_epoch(valid_loader) + self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) + evals_result["train"].append(train_score) + evals_result["valid"].append(val_score) + + if val_score > best_score: + best_score = val_score + stop_steps = 0 + best_epoch = step + best_param = copy.deepcopy(self.model.state_dict()) + else: + stop_steps += 1 + if stop_steps >= self.early_stop: + self.logger.info("early stop") + break + + self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch)) + self.model.load_state_dict(best_param) + torch.save(best_param, save_path) + + if self.use_gpu: + torch.cuda.empty_cache() + + def predict(self, dataset): + if not self.fitted: + raise ValueError("model is not fitted yet!") + + dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) + dl_test.config(fillna_type="ffill+bfill") + test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs) + self.model.eval() + preds = [] + + for data in test_loader: + feature = data[:, :, 0:-1].to(self.device) + + with torch.no_grad(): + pred = self.model(feature.float()).detach().cpu().numpy() + + preds.append(pred) + + return pd.Series(np.concatenate(preds), index=dl_test.get_index()) + + +class PositionalEncoding(nn.Module): + def __init__(self, d_model, max_len=1000): + super(PositionalEncoding, self).__init__() + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer("pe", pe) + + def forward(self, x): + # [T, N, F] + return x + self.pe[: x.size(0), :] + + +class Transformer(nn.Module): + def __init__(self, d_feat=6, d_model=8, nhead=4, num_layers=2, dropout=0.5, device=None): + super(Transformer, self).__init__() + self.feature_layer = nn.Linear(d_feat, d_model) + self.pos_encoder = PositionalEncoding(d_model) + self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout) + self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers) + self.decoder_layer = nn.Linear(d_model, 1) + self.device = device + self.d_feat = d_feat + + def forward(self, src): + # src [N, T, F], [512, 60, 6] + src = self.feature_layer(src) # [512, 60, 8] + + # src [N, T, F] --> [T, N, F], [60, 512, 8] + src = src.transpose(1, 0) # not batch first + + mask = None + + src = self.pos_encoder(src) + output = self.transformer_encoder(src, mask) # [60, 512, 8] + + # [T, N, F] --> [N, T*F] + output = self.decoder_layer(output.transpose(1, 0)[:, -1, :]) # [512, 1] + + return output.squeeze() From 161343018fb27b47d1699d505fcf9818d041f86d Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Fri, 16 Jul 2021 18:35:00 +0800 Subject: [PATCH 40/61] Add files via upload --- .../workflow_config_transformer_Alpha158.yaml | 2 +- .../workflow_config_transformer_Alpha360.yaml | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml index e58c20541..54707386f 100644 --- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml +++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml @@ -50,7 +50,7 @@ port_analysis_config: &port_analysis_config task: model: class: TransformerModel - module_path: qlib.contrib.model.pytorch_transformer + module_path: qlib.contrib.model.pytorch_transformer_ts kwargs: seed: 0 n_jobs: 20 diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml index 59d5fa2b9..e568a1b30 100644 --- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml +++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml @@ -10,13 +10,6 @@ data_handler_config: &data_handler_config fit_end_time: 2014-12-31 instruments: *market infer_processors: - - class: FilterCol - kwargs: - fields_group: feature - col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", - "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", - "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" - ] - class: RobustZScoreNorm kwargs: fields_group: feature @@ -29,8 +22,7 @@ data_handler_config: &data_handler_config - class: CSRankNorm kwargs: fields_group: label - label: ["Ref($close, -2) / Ref($close, -1) - 1"] - + label: ["Ref($close, -2) / Ref($close, -1) - 1"] port_analysis_config: &port_analysis_config strategy: class: TopkDropoutStrategy @@ -52,10 +44,10 @@ task: class: TransformerModel module_path: qlib.contrib.model.pytorch_transformer kwargs: + d_feat: 6 seed: 0 - n_jobs: 20 dataset: - class: TSDatasetH + class: DatasetH module_path: qlib.data.dataset kwargs: handler: @@ -66,7 +58,6 @@ task: train: [2008-01-01, 2014-12-31] valid: [2015-01-01, 2016-12-31] test: [2017-01-01, 2020-08-01] - step_len: 20 record: - class: SignalRecord module_path: qlib.workflow.record_temp @@ -79,4 +70,4 @@ task: - class: PortAnaRecord module_path: qlib.workflow.record_temp kwargs: - config: *port_analysis_config + config: *port_analysis_config \ No newline at end of file From b07e0bffb1473d5b6df16ce4ec8e2cf3b2e106e0 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Fri, 16 Jul 2021 18:35:50 +0800 Subject: [PATCH 41/61] Add files via upload --- .../workflow_config_localformer_Alpha158.yaml | 2 +- .../workflow_config_localformer_Alpha360.yaml | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml index 98090356e..d7e967333 100644 --- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml +++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml @@ -50,7 +50,7 @@ port_analysis_config: &port_analysis_config task: model: class: LocalformerModel - module_path: qlib.contrib.model.pytorch_localformer + module_path: qlib.contrib.model.pytorch_localformer_ts kwargs: seed: 0 n_jobs: 20 diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml index 9792a2357..1c8489461 100644 --- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml +++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml @@ -10,13 +10,6 @@ data_handler_config: &data_handler_config fit_end_time: 2014-12-31 instruments: *market infer_processors: - - class: FilterCol - kwargs: - fields_group: feature - col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", - "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", - "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" - ] - class: RobustZScoreNorm kwargs: fields_group: feature @@ -29,8 +22,7 @@ data_handler_config: &data_handler_config - class: CSRankNorm kwargs: fields_group: label - label: ["Ref($close, -2) / Ref($close, -1) - 1"] - + label: ["Ref($close, -2) / Ref($close, -1) - 1"] port_analysis_config: &port_analysis_config strategy: class: TopkDropoutStrategy @@ -52,10 +44,10 @@ task: class: LocalformerModel module_path: qlib.contrib.model.pytorch_localformer kwargs: + d_feat: 6 seed: 0 - n_jobs: 20 dataset: - class: TSDatasetH + class: DatasetH module_path: qlib.data.dataset kwargs: handler: @@ -66,7 +58,6 @@ task: train: [2008-01-01, 2014-12-31] valid: [2015-01-01, 2016-12-31] test: [2017-01-01, 2020-08-01] - step_len: 20 record: - class: SignalRecord module_path: qlib.workflow.record_temp @@ -79,4 +70,4 @@ task: - class: PortAnaRecord module_path: qlib.workflow.record_temp kwargs: - config: *port_analysis_config + config: *port_analysis_config \ No newline at end of file From 59d4bc9394319958d8ea4f56054d273afeaca6ef Mon Sep 17 00:00:00 2001 From: Young Date: Sun, 18 Jul 2021 12:09:57 +0800 Subject: [PATCH 42/61] update `run_all_model` and black format --- examples/run_all_model.py | 44 +++++++++++++++----- qlib/contrib/model/pytorch_localformer.py | 5 ++- qlib/contrib/model/pytorch_localformer_ts.py | 1 + qlib/contrib/model/pytorch_transformer.py | 5 ++- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/examples/run_all_model.py b/examples/run_all_model.py index c79fee004..1284d8e99 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -23,7 +23,6 @@ from qlib.config import REG_CN from qlib.workflow import R from qlib.tests.data import GetData - # init qlib provider_uri = "~/.qlib/qlib_data/cn_data" exp_folder_name = "run_all_model_records" @@ -40,6 +39,7 @@ exp_manager = { GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager) + # decorator to check the arguments def only_allow_defined_args(function_to_decorate): @functools.wraps(function_to_decorate) @@ -92,7 +92,8 @@ def create_env(): # function to execute the cmd -def execute(cmd): +def execute(cmd, wait_when_err=False): + print("Running CMD:", cmd) with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) as p: for line in p.stdout: sys.stdout.write(line.split("\b")[0]) @@ -102,6 +103,8 @@ def execute(cmd): sys.stdout.write("\b" * 10 + "\b".join(line.split("\b")[1:-1])) if p.returncode != 0: + if wait_when_err: + input("Press Enter to Continue") return p.stderr else: return None @@ -184,7 +187,15 @@ def gen_and_save_md_table(metrics, dataset): # function to run the all the models @only_allow_defined_args -def run(times=1, models=None, dataset="Alpha360", exclude=False): +def run( + times=1, + models=None, + dataset="Alpha360", + exclude=False, + qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib", + wait_before_rm_env: bool = False, + wait_when_err: bool = False, +): """ Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future. Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parrallel running the same model @@ -200,6 +211,13 @@ def run(times=1, models=None, dataset="Alpha360", exclude=False): determines whether the model being used is excluded or included. dataset : str determines the dataset to be used for each model. + qlib_uri : str + the uri to install qlib with pip + it could be url on the we or local path + wait_before_rm_env : bool + wait before remove environment. + wait_when_err : bool + wait when errors raised when executing commands Usage: ------- @@ -240,32 +258,36 @@ def run(times=1, models=None, dataset="Alpha360", exclude=False): sys.stderr.write("\n") # install requirements.txt sys.stderr.write("Installing requirements.txt...\n") - execute(f"{python_path} -m pip install -r {req_path}") + execute(f"{python_path} -m pip install -r {req_path}", wait_when_err=wait_when_err) sys.stderr.write("\n") # setup gpu for tft if fn == "TFT": execute( - f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn" + f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn", + wait_when_err=wait_when_err, ) sys.stderr.write("\n") # install qlib sys.stderr.write("Installing qlib...\n") - execute(f"{python_path} -m pip install --upgrade pip") # TODO: FIX ME! - execute(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME! + execute(f"{python_path} -m pip install --upgrade pip", wait_when_err=wait_when_err) # TODO: FIX ME! + execute(f"{python_path} -m pip install --upgrade cython", wait_when_err=wait_when_err) # TODO: FIX ME! if fn == "TFT": execute( - f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e git+https://github.com/microsoft/qlib#egg=pyqlib" + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e {qlib_uri}", + wait_when_err=wait_when_err, ) # TODO: FIX ME! else: execute( - f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/microsoft/qlib#egg=pyqlib" + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e {qlib_uri}", + wait_when_err=wait_when_err, ) # TODO: FIX ME! sys.stderr.write("\n") # run workflow_by_config for multiple times for i in range(times): sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n") errs = execute( - f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn} {exp_folder_name}" + f"{python_path} {env_path / 'bin' / 'qrun'} {yaml_path} {fn} {exp_folder_name}", + wait_when_err=wait_when_err, ) if errs is not None: _errs = errors.get(fn, {}) @@ -274,6 +296,8 @@ def run(times=1, models=None, dataset="Alpha360", exclude=False): sys.stderr.write("\n") # remove env sys.stderr.write(f"Deleting the environment: {env_path}...\n") + if wait_before_rm_env: + input("Press Enter to Continue") shutil.rmtree(env_path) # getting all results sys.stderr.write(f"Retrieving results...\n") diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py index 1b722ead2..2ec56067f 100644 --- a/qlib/contrib/model/pytorch_localformer.py +++ b/qlib/contrib/model/pytorch_localformer.py @@ -24,6 +24,7 @@ from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP from torch.nn.modules.container import ModuleList + # qrun examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml ” @@ -150,8 +151,8 @@ class LocalformerModel(Model): if len(indices) - i < self.batch_size: break - feature = torch.from_numpy(x_values[indices[i: i + self.batch_size]]).float().to(self.device) - label = torch.from_numpy(y_values[indices[i: i + self.batch_size]]).float().to(self.device) + feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device) with torch.no_grad(): pred = self.model(feature) diff --git a/qlib/contrib/model/pytorch_localformer_ts.py b/qlib/contrib/model/pytorch_localformer_ts.py index aa7af84df..b4999bc9c 100644 --- a/qlib/contrib/model/pytorch_localformer_ts.py +++ b/qlib/contrib/model/pytorch_localformer_ts.py @@ -154,6 +154,7 @@ class LocalformerModel(Model): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) import pdb + pdb.set_trace() dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader diff --git a/qlib/contrib/model/pytorch_transformer.py b/qlib/contrib/model/pytorch_transformer.py index cca7a7871..53ebff3c5 100644 --- a/qlib/contrib/model/pytorch_transformer.py +++ b/qlib/contrib/model/pytorch_transformer.py @@ -23,6 +23,7 @@ from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP + # qrun examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml ” @@ -149,8 +150,8 @@ class TransformerModel(Model): if len(indices) - i < self.batch_size: break - feature = torch.from_numpy(x_values[indices[i: i + self.batch_size]]).float().to(self.device) - label = torch.from_numpy(y_values[indices[i: i + self.batch_size]]).float().to(self.device) + feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device) with torch.no_grad(): pred = self.model(feature) From e006ef40add859597ca94bb3a27064b33b27576d Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Sun, 18 Jul 2021 22:37:41 +0800 Subject: [PATCH 43/61] Update pytorch_localformer_ts.py --- qlib/contrib/model/pytorch_localformer_ts.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/qlib/contrib/model/pytorch_localformer_ts.py b/qlib/contrib/model/pytorch_localformer_ts.py index b4999bc9c..683a9bd4f 100644 --- a/qlib/contrib/model/pytorch_localformer_ts.py +++ b/qlib/contrib/model/pytorch_localformer_ts.py @@ -42,7 +42,7 @@ class LocalformerModel(Model): optimizer="adam", reg=1e-3, n_jobs=10, - GPU=2, + GPU=0, seed=None, **kwargs ): @@ -153,9 +153,6 @@ class LocalformerModel(Model): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) - import pdb - - pdb.set_trace() dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader From 698e59ac72732828da3d9ef7c6e4d4f4d7beece7 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Tue, 20 Jul 2021 14:55:03 +0800 Subject: [PATCH 44/61] Add performance of two new models Add the performance of transformer and localformer. --- examples/benchmarks/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index fc97657d0..ee2c0a833 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -23,6 +23,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 | | TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 | | TCTS (Xueqing Wu, et al.)| Alpha360 | 0.0485±0.00 | 0.3689±0.04| 0.0586±0.00 | 0.4669±0.02 | 0.0816±0.02 | 1.1572±0.30| -0.0689±0.02 | +| Transformer (Ashish Vaswani, et al.)| Alpha360 | 0.0141±0.00 | 0.0917±0.02| 0.0331±0.00 | 0.2357±0.03 | -0.0259±0.03 | -0.3323±0.43| -0.1763±0.07 | +| Localformer (Juyong Jiang, et al.)| Alpha360 | 0.0408±0.00 | 0.2988±0.03| 0.0538±0.00 | 0.4105±0.02 | 0.0275±0.03 | 0.3464±0.37| -0.1182±0.03 | ## Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | @@ -39,6 +41,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 | | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 | | TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 | +| Transformer (Ashish Vaswani, et al.)| Alpha158 | 0.0274±0.00 | 0.2166±0.04| 0.0409±0.00 | 0.3342±0.04 | 0.0204±0.03 | 0.2888±0.40| -0.1216±0.04 | +| Localformer (Juyong Jiang, et al.)| Alpha158 | 0.0355±0.00 | 0.2747±0.04| 0.0466±0.00 | 0.3762±0.03 | 0.0506±0.02 | 0.7447±0.34| -0.0875±0.02 | - The selected 20 features are based on the feature importance of a lightgbm-based model. - The base model of DoubleEnsemble is LGBM. From 29e66b2deafdc3a8669ff70dda32f6f49390cf3f Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Tue, 20 Jul 2021 15:00:15 +0800 Subject: [PATCH 45/61] Add two new model in zoo Add transformer and localformer (SLGT) models for time series prediction in finance in the Quant Model Zoo. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8668ccc14..68a275de6 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,8 @@ Here is a list of models built on `Qlib`. - [TabNet based on pytorch (Sercan O. Arik, et al. AAAI 2019)](qlib/contrib/model/pytorch_tabnet.py) - [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. ICDM 2020)](qlib/contrib/model/double_ensemble.py) - [TCTS based on pytorch (Xueqing Wu, et al. ICML 2021)](qlib/contrib/model/pytorch_tcts.py) +- [Transformer based on pytorch (Ashish Vaswani, et al. NeurIPS 2017)](qlib/contrib/model/pytorch_transformer.py) +- [TCTS based on pytorch (Juyong Jiang, et al.)](qlib/contrib/model/pytorch_localformer.py) Your PR of new Quant models is highly welcomed. From 025b1dcff96dee34f3143bf0b13570b96e93fc44 Mon Sep 17 00:00:00 2001 From: Ying-Tao Luo Date: Tue, 20 Jul 2021 15:00:59 +0800 Subject: [PATCH 46/61] Add two new models in model zoo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68a275de6..6c56a3c25 100644 --- a/README.md +++ b/README.md @@ -292,7 +292,7 @@ Here is a list of models built on `Qlib`. - [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. ICDM 2020)](qlib/contrib/model/double_ensemble.py) - [TCTS based on pytorch (Xueqing Wu, et al. ICML 2021)](qlib/contrib/model/pytorch_tcts.py) - [Transformer based on pytorch (Ashish Vaswani, et al. NeurIPS 2017)](qlib/contrib/model/pytorch_transformer.py) -- [TCTS based on pytorch (Juyong Jiang, et al.)](qlib/contrib/model/pytorch_localformer.py) +- [Localformer based on pytorch (Juyong Jiang, et al.)](qlib/contrib/model/pytorch_localformer.py) Your PR of new Quant models is highly welcomed. From 48af7126b6f1fb9ac78b9f5fe6ddb88afe46831b Mon Sep 17 00:00:00 2001 From: you-n-g Date: Thu, 22 Jul 2021 11:07:09 +0800 Subject: [PATCH 47/61] Update news about models --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6c56a3c25..422046c13 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Recent released features | Feature | Status | | -- | ------ | +| Transformer & Localformer | [Released](https://github.com/microsoft/qlib/pull/508) on July 22, 2021 | | Release Qlib v0.7.0 | [Released](https://github.com/microsoft/qlib/releases/tag/v0.7.0) on July 12, 2021 | | TCTS Model | [Released](https://github.com/microsoft/qlib/pull/491) on July 1, 2021 | | Online serving and automatic model rolling | :star: [Released](https://github.com/microsoft/qlib/pull/290) on May 17, 2021 | From 3810a4cd33e33a814a59d7709ff5645b3ebc9e48 Mon Sep 17 00:00:00 2001 From: panshuaiyin <56989678+panshuaiyin@users.noreply.github.com> Date: Thu, 22 Jul 2021 13:21:54 +0800 Subject: [PATCH 48/61] Update data.rst use own alpha-factor --- docs/component/data.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/component/data.rst b/docs/component/data.rst index 73072f369..a3dc74052 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -179,6 +179,7 @@ After conversion, users can find their Qlib format data in the directory `~/.qli The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted `_ In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended. + If you want to use your own alpha-factor which can't be calculate by OCHLV, like PE, EPS and so on, you could add it to the CSV files with OHCLV together and then dump it to the Qlib format data. Stock Pool (Market) -------------------------------- From 2c8a3ded08c249f18d8d3e71670c3b6c68ac79cb Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Thu, 22 Jul 2021 15:20:03 +0000 Subject: [PATCH 49/61] high_performance_data_structure --- qlib/backtest/exchange.py | 106 +------ qlib/backtest/high_performance_ds.py | 414 +++++++++++++++++++++++++++ qlib/backtest/report.py | 277 +++--------------- 3 files changed, 453 insertions(+), 344 deletions(-) create mode 100644 qlib/backtest/high_performance_ds.py diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 8d02e7893..edcd7baaf 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -4,7 +4,7 @@ import random import logging -from typing import List, Tuple, Union +from typing import List, Tuple, Union, Callable, Iterable import numpy as np import pandas as pd @@ -15,6 +15,7 @@ from ..config import C, REG_CN from ..utils.resam import resam_ts_data, ts_data_last from ..log import get_module_logger from .order import Order, OrderDir, OrderHelper +from .high_performane_ds import PandasQuote class Exchange: @@ -32,6 +33,7 @@ class Exchange: close_cost=0.0025, min_cost=5, extra_quote=None, + quote_cls=PandasQuote, **kwargs, ): """__init__ @@ -143,7 +145,8 @@ class Exchange: self.get_quote_from_qlib() # init quote by quote_df - self.quote = PandasQuote(self.quote_df) + self.quote_cls = quote_cls + self.quote = self.quote_cls(self.quote_df) def get_quote_from_qlib(self): # get stock data from qlib @@ -593,102 +596,3 @@ class Exchange: # cache to avoid recreate the same instance self._order_helper = OrderHelper(self) return self._order_helper - - -class BaseQuote: - def __init__(self, quote_df: pd.DataFrame): - self.logger = get_module_logger("online operator", level=logging.INFO) - - def get_all_stock(self): - """return all stock codes - - Return - ------ - Union[list, Dict.keys(), set, tuple] - all stock codes - """ - raise NotImplementedError(f"Please implement the `get_all_stock` method") - - def get_data( - self, - stock_id: Union[str, list], - start_time: Union[pd.Timestamp, str], - end_time: Union[pd.Timestamp, str], - fields: Union[str, list] = None, - method: Union[str, "Callable"] = None, - ): - """get the specific fields of stock data during start time and end_time, - and apply method to the data. - - Example: - .. code-block:: - $close $volume - instrument datetime - SH600000 2010-01-04 86.778313 16162960.0 - 2010-01-05 87.433578 28117442.0 - 2010-01-06 85.713585 23632884.0 - 2010-01-07 83.788803 20813402.0 - 2010-01-08 84.730675 16044853.0 - - SH600655 2010-01-04 2699.567383 158193.328125 - 2010-01-08 2612.359619 77501.406250 - 2010-01-11 2712.982422 160852.390625 - 2010-01-12 2788.688232 164587.937500 - 2010-01-13 2790.604004 145460.453125 - - print(get_data(stock_id=["SH600000", "SH600655"], start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) - - $close $volume - instrument - SH600000 87.433578 28117442.0 - SH600655 2699.567383 158193.328125 - - print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) - - $close 87.433578 - $volume 28117442.0 - - print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields="$close", method="last")) - - 87.433578 - - Parameters - ---------- - stock_id: Union[str, list] - start_time : Union[pd.Timestamp, str] - closed start time for backtest - end_time : Union[pd.Timestamp, str] - closed end time for backtest - fields : Union[str, List] - the columns of data to fetch - method : Union[str, Callable] - the method apply to data. - e.g ["None", "last", "all", "sum", "mean", "any", qlib/utils/resam.py/ts_data_last] - - Return - ---------- - Union[None, float, pd.Series, pd.DataFrame] - The resampled DataFrame/Series/value, return None when the resampled data is empty. - """ - - raise NotImplementedError(f"Please implement the `get_data` method") - - -class PandasQuote(BaseQuote): - def __init__(self, quote_df: pd.DataFrame): - super().__init__(quote_df=quote_df) - quote_dict = {} - for stock_id, stock_val in quote_df.groupby(level="instrument"): - quote_dict[stock_id] = stock_val.droplevel(level="instrument") - self.data = quote_dict - - def get_all_stock(self): - return self.data.keys() - - def get_data(self, stock_id, start_time, end_time, fields=None, method=None): - if fields is None: - return resam_ts_data(self.data[stock_id], start_time, end_time, method=method) - elif isinstance(fields, (str, list)): - return resam_ts_data(self.data[stock_id][fields], start_time, end_time, method=method) - else: - raise ValueError(f"fields must be None, str or list") diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py new file mode 100644 index 000000000..3e5a9d8e2 --- /dev/null +++ b/qlib/backtest/high_performance_ds.py @@ -0,0 +1,414 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +import logging +from typing import List, Tuple, Union, Callable, Iterable, Dict +from collections import OrderedDict + +import inspect +import pandas as pd + +from ..utils.resam import resam_ts_data +from ..log import get_module_logger + + +class BaseQuote: + def __init__(self, quote_df: pd.DataFrame): + self.logger = get_module_logger("online operator", level=logging.INFO) + + def get_all_stock(self) -> Iterable: + """return all stock codes + + Return + ------ + Iterable + all stock codes + """ + raise NotImplementedError(f"Please implement the `get_all_stock` method") + + def get_data( + self, + stock_id: Union[str, list], + start_time: Union[pd.Timestamp, str], + end_time: Union[pd.Timestamp, str], + fields: Union[str, list] = None, + method: Union[str, Callable] = None, + ) -> Union[None, float, pd.Series, pd.DataFrame]: + """get the specific fields of stock data during start time and end_time, + and apply method to the data. + + Example: + .. code-block:: + $close $volume + instrument datetime + SH600000 2010-01-04 86.778313 16162960.0 + 2010-01-05 87.433578 28117442.0 + 2010-01-06 85.713585 23632884.0 + 2010-01-07 83.788803 20813402.0 + 2010-01-08 84.730675 16044853.0 + + SH600655 2010-01-04 2699.567383 158193.328125 + 2010-01-08 2612.359619 77501.406250 + 2010-01-11 2712.982422 160852.390625 + 2010-01-12 2788.688232 164587.937500 + 2010-01-13 2790.604004 145460.453125 + + print(get_data(stock_id=["SH600000", "SH600655"], start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) + + $close $volume + instrument + SH600000 87.433578 28117442.0 + SH600655 2699.567383 158193.328125 + + print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields=["$close", "$volume"], method="last")) + + $close 87.433578 + $volume 28117442.0 + + print(get_data(stock_id="SH600000", start_time="2010-01-04", end_time="2010-01-05", fields="$close", method="last")) + + 87.433578 + + Parameters + ---------- + stock_id: Union[str, list] + start_time : Union[pd.Timestamp, str] + closed start time for backtest + end_time : Union[pd.Timestamp, str] + closed end time for backtest + fields : Union[str, List] + the columns of data to fetch + method : Union[str, Callable] + the method apply to data. + e.g ["None", "last", "all", "sum", "mean", "any", qlib/utils/resam.py/ts_data_last] + + Return + ---------- + Union[None, float, pd.Series, pd.DataFrame] + The resampled DataFrame/Series/value, return None when the resampled data is empty. + """ + + raise NotImplementedError(f"Please implement the `get_data` method") + + +class PandasQuote(BaseQuote): + def __init__(self, quote_df: pd.DataFrame): + super().__init__(quote_df=quote_df) + quote_dict = {} + for stock_id, stock_val in quote_df.groupby(level="instrument"): + quote_dict[stock_id] = stock_val.droplevel(level="instrument") + self.data = quote_dict + + def get_all_stock(self): + return self.data.keys() + + def get_data(self, stock_id, start_time, end_time, fields=None, method=None): + if fields is None: + return resam_ts_data(self.data[stock_id], start_time, end_time, method=method) + elif isinstance(fields, (str, list)): + return resam_ts_data(self.data[stock_id][fields], start_time, end_time, method=method) + else: + raise ValueError(f"fields must be None, str or list") + + +class BaseSingleMetric: + """ + The data structure of the single metric. + The following methods are used for computing metrics in one indicator. + """ + + def __init__(self, metric: Union[dict, pd.Series]): + pass + + def __add__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __radd__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + return self + other + + def __sub__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __rsub__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __mul__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __truediv__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __eq__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __gt__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __lt__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": + pass + + def __len__(self) -> int: + pass + + def sum(self) -> float: + pass + + def mean(self) -> float: + pass + + def count(self) -> int: + pass + + def abs(self) -> "BaseSingleMetric": + pass + + def astype(self, type: type) -> "BaseSingleMetric": + pass + + @property + def empty(self) -> bool: + """If metric is empyt, return True.""" + pass + + def add(self, other: "BaseSingleMetric", fill_value: float = None) -> "BaseSingleMetric": + """Replace np.NaN with fill_value in two metrics and add them.""" + pass + + def apply(self, map_dict: dict) -> "BaseSingleMetric": + """Replace the value of metric according to map_dict.""" + pass + + +class BaseOrderIndicator: + """ + The data structure of order indicator. + !!!NOTE: There are two ways to organize the data structure. Please choose a better way. + 1. one way is use BaseSingleMetric to represent each metric. For example, the data + structure of PandasOrderIndicator is Dict[str: PandasSingleMetric]. It uses + PandasSingleMetric based on pd.Series to represent each metric. + 2. the another way doesn't BaseSingleMetric to represent each metric. The data + structure of PandasOrderIndicator is a whole matrix. + """ + + def assign(self, col: str, metric: Union[dict, pd.Series]): + """assign one metric. + + Parameters + ---------- + col : str + the metric name of one metric. + metric : Union[dict, pd.Series] + the metric data. + """ + + pass + + def transfer(self, func: Callable, new_col: str = None) -> Union[None, BaseSingleMetric]: + """compute new metric with existing metrics. + + Parameters + ---------- + func : Callable + the func of computing new metric. + the kwargs of func will be replaced with metric data by name in this function. + e.g. + def func(pa): + return (pa > 0).astype(int).sum() / pa.count() + new_col : str, optional + New metric will be assigned in the data if new_col is not None, by default None. + + Return + ---------- + BaseSingleMetric + new metric. + """ + + pass + + def get_metric_series(self, metric: str) -> pd.Series: + """return the single metric with pd.Series format. + + Parameters + ---------- + metric : str + the metric name. + + Return + ---------- + pd.Series + the single metric. + If there is no metric name in the data, return pd.Series(). + """ + + pass + + @staticmethod + def sum_all_indicators( + indicators: list, metrics: Union[str, List[str]], fill_value: float = None + ) -> Dict[str, BaseSingleMetric]: + """sum indicators with the same metrics. + + Parameters + ---------- + indicators : List[BaseOrderIndicator] + the list of all inner indicators. + metrics : Union[str, List[str]] + all metrics needs ot be sumed. + fill_value : float, optional + fill np.NaN with value. By default None. + + Return + ---------- + Dict[str: PandasSingleMetric] + a dict of metric name and data. + """ + + pass + + +class PandasSingleMetric: + """Each SingleMetric is based on pd.Series.""" + + def __init__(self, metric: Union[dict, pd.Series]): + if isinstance(metric, dict): + self.metric = pd.Series(metric) + elif isinstance(metric, pd.Series): + self.metric = metric + else: + raise ValueError(f"metric must be dict or pd.Series") + + def __add__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric + other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric + other.metric) + else: + return NotImplemented + + def __sub__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric - other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric - other.metric) + else: + return NotImplemented + + def __rsub__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(other - self.metric) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(other.metric - self.metric) + else: + return NotImplemented + + def __mul__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric * other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric * other.metric) + else: + return NotImplemented + + def __truediv__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric / other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric / other.metric) + else: + return NotImplemented + + def __eq__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric == other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric == other.metric) + else: + return NotImplemented + + def __gt__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric < other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric < other.metric) + else: + return NotImplemented + + def __lt__(self, other): + if isinstance(other, (int, float)): + return PandasSingleMetric(self.metric > other) + elif isinstance(other, PandasSingleMetric): + return PandasSingleMetric(self.metric > other.metric) + else: + return NotImplemented + + def __len__(self): + return len(self.metric) + + def sum(self): + return self.metric.sum() + + def mean(self): + return self.metric.mean() + + def count(self): + return self.metric.count() + + def abs(self): + return PandasSingleMetric(self.metric.abs()) + + def astype(self, type): + return PandasSingleMetric(self.metric.astype(type)) + + @property + def empty(self): + return self.metric.empty + + def add(self, other, fill_value=None): + return PandasSingleMetric(self.metric.add(other.metric, fill_value=fill_value)) + + def apply(self, map_dict: dict): + return PandasSingleMetric(self.metric.apply(map_dict)) + + +class PandasOrderIndicator(BaseOrderIndicator): + """ + The data structure is OrderedDict(str: PandasSingleMetric). + Each PandasSingleMetric based on pd.Series is one metric. + Str is the name of metric. + """ + + def __init__(self): + self.data: Dict[str, PandasSingleMetric] = OrderedDict() + + def assign(self, col: str, metric: Union[dict, pd.Series]): + self.data[col] = PandasSingleMetric(metric) + + def transfer(self, func: Callable, new_col: str = None) -> Union[None, PandasSingleMetric]: + func_sig = inspect.signature(func).parameters.keys() + func_kwargs = {sig: self.data[sig] for sig in func_sig} + tmp_metric = func(**func_kwargs) + if new_col is not None: + self.data[new_col] = tmp_metric + else: + return tmp_metric + + def get_metric_series(self, metric: str) -> Union[pd.Series]: + if metric in self.data: + return self.data[metric].metric + else: + return pd.Series() + + @staticmethod + def sum_all_indicators( + indicators: list, metrics: Union[str, List[str]], fill_value=None + ) -> Dict[str, PandasSingleMetric]: + metric_dict = {} + if isinstance(metrics, str): + metrics = [metrics] + for metric in metrics: + tmp_metric = PandasSingleMetric({}) + for indicator in indicators: + tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) + metric_dict[metric] = tmp_metric.metric + return metric_dict diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 1ae50f5e2..98d8b4f63 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -5,7 +5,7 @@ from collections import OrderedDict from logging import warning import pathlib -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Tuple, Union, Callable import warnings import inspect @@ -18,6 +18,7 @@ from qlib.backtest.exchange import Exchange from qlib.backtest.order import BaseTradeDecision, Order, OrderDir from qlib.backtest.utils import TradeCalendarManager +from .high_performane_ds import PandasOrderIndicator from ..data import D from ..tests.config import CSI300_BENCH from ..utils.resam import get_higher_eq_freq_feature, resam_ts_data @@ -254,10 +255,12 @@ class Indicator: """ - def __init__(self): + def __init__(self, order_indicator_cls=PandasOrderIndicator): + self.order_indicator_cls = order_indicator_cls + # order indicator is metrics for a single order for a specific step self.order_indicator_his = OrderedDict() - self.order_indicator = PandasOrderIndicator() + self.order_indicator = self.order_indicator_cls() # trade indicator is metrics for all orders for a specific step self.trade_indicator_his = OrderedDict() @@ -267,7 +270,7 @@ class Indicator: # def reset(self, trade_calendar: TradeCalendarManager): def reset(self): - self.order_indicator = PandasOrderIndicator() + self.order_indicator = self.order_indicator_cls() self.trade_indicator = OrderedDict() # self._trade_calendar = trade_calendar @@ -291,6 +294,7 @@ class Indicator: trade_value[order.stock_id] = _trade_val * order.sign trade_cost[order.stock_id] = _trade_cost trade_dir[order.stock_id] = order.direction + # The PA in the innermost layer is meanless pa[order.stock_id] = 0 self.order_indicator.assign("amount", amount) @@ -306,32 +310,33 @@ class Indicator: def _update_order_fulfill_rate(self): def func(deal_amount, amount): return deal_amount / amount + self.order_indicator.transfer(func, "ffr") def update_order_indicators(self, trade_info: list): self._update_order_trade_info(trade_info=trade_info) self._update_order_fulfill_rate() - # self._update_order_price_advantage() def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]): - all_metric = ["inner_amount", "deal_amount", "trade_price", - "trade_value", "trade_cost", "trade_dir"] - metric_dict = PandasOrderIndicator.agg_all_indicators(inner_order_indicators, all_metric, fill_value=0) + all_metric = ["inner_amount", "deal_amount", "trade_price", "trade_value", "trade_cost", "trade_dir"] + metric_dict = self.order_indicator_cls.sum_all_indicators(inner_order_indicators, all_metric, fill_value=0) for metric in metric_dict: self.order_indicator.assign(metric, metric_dict[metric]) def func(trade_price, deal_amount): return trade_price / deal_amount + self.order_indicator.transfer(func, "trade_price") def func_apply(trade_dir): return trade_dir.apply(Order.parse_dir) + self.order_indicator.transfer(func_apply, "trade_dir") def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision): # NOTE: these indicator is designed for order execution, so the decision: List[Order] = outer_trade_decision.get_decision() - if decision is None: + if len(decision) == 0: self.order_indicator.assign("amount", {}) else: self.order_indicator.assign("amount", {order.stock_id: order.amount_delta for order in decision}) @@ -450,11 +455,14 @@ class Indicator: def _agg_order_price_advantage(self): def if_empty_func(trade_price): return trade_price.empty + if_empty = self.order_indicator.transfer(if_empty_func) if not if_empty: + def func(trade_dir, trade_price, base_price): sign = 1 - trade_dir * 2 return sign * (trade_price / base_price - 1) + self.order_indicator.transfer(func, "pa") else: self.order_indicator.assign("pa", {}) @@ -471,33 +479,45 @@ class Indicator: self._update_trade_amount(outer_trade_decision) self._update_order_fulfill_rate() pa_config = indicator_config.get("pa_config", {}) - self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config) # TODO + self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config) # TODO self._agg_order_price_advantage() def _cal_trade_fulfill_rate(self, method="mean"): if method == "mean": + def func(ffr): return ffr.mean() + elif method == "amount_weighted": + def func(ffr, deal_amount): return (ffr * deal_amount.abs()).sum() / (deal_amount.abs().sum()) + elif method == "value_weighted": + def func(ffr, trade_value): return (ffr * trade_value.abs()).sum() / (trade_value.abs().sum()) + else: raise ValueError(f"method {method} is not supported!") return self.order_indicator.transfer(func) def _cal_trade_price_advantage(self, method="mean"): if method == "mean": + def func(pa): return pa.mean() + elif method == "amount_weighted": + def func(pa, deal_amount): return (pa * deal_amount.abs()).sum() / (deal_amount.abs().sum()) + elif method == "value_weighted": + def func(pa, trade_value): return (pa * trade_value.abs()).sum() / (trade_value.abs().sum()) + else: raise ValueError(f"method {method} is not supported!") return self.order_indicator.transfer(func) @@ -505,21 +525,25 @@ class Indicator: def _cal_trade_positive_rate(self): def func(pa): return (pa > 0).astype(int).sum() / pa.count() + return self.order_indicator.transfer(func) def _cal_deal_amount(self): def func(deal_amount): return deal_amount.abs().sum() + return self.order_indicator.transfer(func) def _cal_trade_value(self): def func(trade_value): return trade_value.abs().sum() + return self.order_indicator.transfer(func) def _cal_trade_order_count(self): def func(amount): return amount.count() + return self.order_indicator.transfer(func) def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}): @@ -553,236 +577,3 @@ class Indicator: def generate_trade_indicators_dataframe(self): return pd.DataFrame.from_dict(self.trade_indicator_his, orient="index") - - -class BaseOrderIndicator: - """The data structure of order indicator. - """ - - def __init__(self): - pass - - def assign(self, col: str, metric: Union[dict, pd.Series]): - """assign one metric. - - Parameters - ---------- - col : str - the metric name of one metric. - metric : Union[dict, pd.Series] - the metric data. - """ - - pass - - def transfer(self, func: "Callable", new_col: str = None): - """compute new metric with existing. - - Parameters - ---------- - func : Callable - the func of computing new metric. - the kwargs of func will be replaced with metric data by name in this function. - e.g. - def func(pa): - return (pa > 0).astype(int).sum() / pa.count() - new_col : str, optional - New metric will be assigned in the data if new_col is not None, by default None. - - Return - ---------- - SingleMetric - new metric. - """ - - pass - - def get_metric_series(self, metric: str): - """return the single metric with pd.Series format - - Parameters - ---------- - metric : str - the metric name. - - Return - ---------- - pd.Series - the single metric. - If there is no metric name in the data, return pd.Series(). - """ - - pass - - @classmethod - def agg_all_indicators(indicators: list, metrics: Union[str, List[str]], fill_value: float = None): - """sum indicators with the same metrics. - - Parameters - ---------- - indicators : List[BaseOrderIndicator] - the list of all inner indicators. - metrics : Union[str, List[str]] - all metrics needs ot be sumed. - fill_value : float, optional - fill np.NaN with value. By default None. - - Return - ---------- - Dict[str: SingleMetric] - a dict of metric name and data. - """ - - pass - - -class PandasOrderIndicator(BaseOrderIndicator): - """The data structure is OrderedDict(str: SingleMetric). - Each SingleMetric based on pd.Series is one metric. - Str is the name of metric. - """ - - class SingleMetric: - """The data structure of the single metric. - The following methods are used for computing metrics in one indicator. - """ - - def __init__(self, metric: Union[dict, pd.Series]): - if isinstance(metric, dict): - self.metric = pd.Series(metric) - elif isinstance(metric, pd.Series): - self.metric = metric - else: - raise ValueError(f"metric must be dict or pd.Series") - - def __add__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric + other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric + other.metric) - else: - return NotImplemented - - def __radd__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(other + self.metric) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(other.metric + self.metric) - else: - return NotImplemented - - def __sub__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric - other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric - other.metric) - else: - return NotImplemented - - def __rsub__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(other - self.metric) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(other.metric - self.metric) - else: - return NotImplemented - - def __mul__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric * other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric * other.metric) - else: - return NotImplemented - - def __truediv__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric / other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric / other.metric) - else: - return NotImplemented - - def __eq__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric == other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric == other.metric) - else: - return NotImplemented - - def __gt__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric < other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric < other.metric) - else: - return NotImplemented - - def __lt__(self, other): - if isinstance(other, (int, float)): - return PandasOrderIndicator.SingleMetric(self.metric > other) - elif isinstance(other, PandasOrderIndicator.SingleMetric): - return PandasOrderIndicator.SingleMetric(self.metric > other.metric) - else: - return NotImplemented - - def __len__(self): - return len(self.metric) - - def sum(self): - return self.metric.sum() - - def mean(self): - return self.metric.mean() - - def count(self): - return self.metric.count() - - def abs(self): - return PandasOrderIndicator.SingleMetric(self.metric.abs()) - - def astype(self, type): - return PandasOrderIndicator.SingleMetric(self.metric.astype(type)) - - @property - def empty(self): - return self.metric.empty - - def add(self, other, fill_value: None): - return PandasOrderIndicator.SingleMetric(self.metric.add(other.metric, fill_value = fill_value)) - - def apply(self, map_dict: dict): - return PandasOrderIndicator.SingleMetric(self.metric.apply(map_dict)) - - def __init__(self): - self.data: Dict[str, self.SingleMetric] = OrderedDict() - - def assign(self, col: str, metric: Union[dict, pd.Series]): - self.data[col] = self.SingleMetric(metric) - - def transfer(self, func: "Callable", new_col: str = None): - func_sig = inspect.signature(func).parameters.keys() - func_kwargs = {sig: self.data[sig] for sig in func_sig} - tmp_metric = func(**func_kwargs) - if(new_col is not None): - self.data[new_col] = tmp_metric - return tmp_metric - - def get_metric_series(self, metric: str): - if(metric in self.data): - return self.data[metric].metric - else: - return pd.Series() - - @staticmethod - def agg_all_indicators(indicators: list, metrics: Union[str, List[str]], fill_value = None): - metric_dict = {} - if isinstance(metrics, str): - metrics = [metrics] - for metric in metrics: - tmp_metric = PandasOrderIndicator.SingleMetric({}) - for indicator in indicators: - tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) - metric_dict[metric] = tmp_metric.metric - return metric_dict \ No newline at end of file From 0ec6b87d39e9f8fcccc99abb9a5c904bd3e1c8b0 Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 23 Jul 2021 05:50:41 +0000 Subject: [PATCH 50/61] fix little bug --- qlib/backtest/exchange.py | 2 +- qlib/backtest/high_performance_ds.py | 53 +++++++++++++++------------- qlib/backtest/report.py | 6 ++-- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index edcd7baaf..5677e855d 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -15,7 +15,7 @@ from ..config import C, REG_CN from ..utils.resam import resam_ts_data, ts_data_last from ..log import get_module_logger from .order import Order, OrderDir, OrderHelper -from .high_performane_ds import PandasQuote +from .high_performance_ds import PandasQuote class Exchange: diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index 3e5a9d8e2..8a908fbf0 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -25,6 +25,7 @@ class BaseQuote: Iterable all stock codes """ + raise NotImplementedError(f"Please implement the `get_all_stock` method") def get_data( @@ -119,76 +120,80 @@ class BaseSingleMetric: """ def __init__(self, metric: Union[dict, pd.Series]): - pass + raise NotImplementedError(f"Please implement the `__init__` method") def __add__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__add__` method") def __radd__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": return self + other def __sub__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__sub__` method") def __rsub__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__rsub__` method") def __mul__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__mul__` method") def __truediv__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__truediv__` method") def __eq__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__eq__` method") def __gt__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__gt__` method") def __lt__(self, other: Union["BaseSingleMetric", int, float]) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `__lt__` method") def __len__(self) -> int: - pass + raise NotImplementedError(f"Please implement the `__len__` method") def sum(self) -> float: - pass + raise NotImplementedError(f"Please implement the `sum` method") def mean(self) -> float: - pass + raise NotImplementedError(f"Please implement the `mean` method") def count(self) -> int: - pass + """Return the count of the single metric, NaN is not included. + """ + + raise NotImplementedError(f"Please implement the `count` method") def abs(self) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `abs` method") def astype(self, type: type) -> "BaseSingleMetric": - pass + raise NotImplementedError(f"Please implement the `astype` method") @property def empty(self) -> bool: """If metric is empyt, return True.""" - pass + raise NotImplementedError(f"Please implement the `empty` method") def add(self, other: "BaseSingleMetric", fill_value: float = None) -> "BaseSingleMetric": """Replace np.NaN with fill_value in two metrics and add them.""" - pass + raise NotImplementedError(f"Please implement the `add` method") - def apply(self, map_dict: dict) -> "BaseSingleMetric": + def map(self, map_dict: dict) -> "BaseSingleMetric": """Replace the value of metric according to map_dict.""" - pass + raise NotImplementedError(f"Please implement the `map` method") class BaseOrderIndicator: """ The data structure of order indicator. !!!NOTE: There are two ways to organize the data structure. Please choose a better way. - 1. one way is use BaseSingleMetric to represent each metric. For example, the data - structure of PandasOrderIndicator is Dict[str: PandasSingleMetric]. It uses + 1. One way is using BaseSingleMetric to represent each metric. For example, the data + structure of PandasOrderIndicator is Dict[str, PandasSingleMetric]. It uses PandasSingleMetric based on pd.Series to represent each metric. - 2. the another way doesn't BaseSingleMetric to represent each metric. The data - structure of PandasOrderIndicator is a whole matrix. + 2. The another way doesn't use BaseSingleMetric to represent each metric. The data + structure of PandasOrderIndicator is a whole matrix. It means you are not neccesary + to inherit the BaseSingleMetric. """ def assign(self, col: str, metric: Union[dict, pd.Series]): @@ -367,7 +372,7 @@ class PandasSingleMetric: def add(self, other, fill_value=None): return PandasSingleMetric(self.metric.add(other.metric, fill_value=fill_value)) - def apply(self, map_dict: dict): + def map(self, map_dict: dict): return PandasSingleMetric(self.metric.apply(map_dict)) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 98d8b4f63..375100cba 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -6,8 +6,6 @@ from collections import OrderedDict from logging import warning import pathlib from typing import Dict, List, Tuple, Union, Callable -import warnings -import inspect import numpy as np import pandas as pd @@ -18,7 +16,7 @@ from qlib.backtest.exchange import Exchange from qlib.backtest.order import BaseTradeDecision, Order, OrderDir from qlib.backtest.utils import TradeCalendarManager -from .high_performane_ds import PandasOrderIndicator +from .high_performance_ds import PandasOrderIndicator from ..data import D from ..tests.config import CSI300_BENCH from ..utils.resam import get_higher_eq_freq_feature, resam_ts_data @@ -329,7 +327,7 @@ class Indicator: self.order_indicator.transfer(func, "trade_price") def func_apply(trade_dir): - return trade_dir.apply(Order.parse_dir) + return trade_dir.map(Order.parse_dir) self.order_indicator.transfer(func_apply, "trade_dir") From a8ea66b83ea7345d12746a3da334ba070347ff8f Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Fri, 23 Jul 2021 09:33:04 +0000 Subject: [PATCH 51/61] black --- qlib/backtest/high_performance_ds.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index 8a908fbf0..104be5b9c 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -159,8 +159,7 @@ class BaseSingleMetric: raise NotImplementedError(f"Please implement the `mean` method") def count(self) -> int: - """Return the count of the single metric, NaN is not included. - """ + """Return the count of the single metric, NaN is not included.""" raise NotImplementedError(f"Please implement the `count` method") From 9d732e964667aa39fa72cf0348d20f1a535f72f2 Mon Sep 17 00:00:00 2001 From: wangwenxi-handsome <77676340+wangwenxi-handsome@users.noreply.github.com> Date: Fri, 23 Jul 2021 18:25:24 +0800 Subject: [PATCH 52/61] Update Action --- .github/workflows/test.yml | 22 +++++++++++----------- .github/workflows/test_macos.yml | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a78d2d9a..490c06246 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,16 +56,16 @@ jobs: fi shell: bash - - name: Test workflow by config (install from pip) - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml - $CONDA\\python.exe -m pip uninstall -y pyqlib - else - $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml - sudo $CONDA/bin/python -m pip uninstall -y pyqlib - fi - shell: bash + # - name: Test workflow by config (install from pip) + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml + # $CONDA\\python.exe -m pip uninstall -y pyqlib + # else + # $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + # sudo $CONDA/bin/python -m pip uninstall -y pyqlib + # fi + # shell: bash # Test Qlib installed from source - name: Install Qlib from source @@ -111,4 +111,4 @@ jobs: else $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml fi - shell: bash \ No newline at end of file + shell: bash diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml index 57aa87ded..5b34d84c2 100644 --- a/.github/workflows/test_macos.yml +++ b/.github/workflows/test_macos.yml @@ -44,10 +44,10 @@ jobs: run: | $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - - name: Test workflow by config (install from pip) - run: | - $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml - sudo $CONDA/bin/python -m pip uninstall -y pyqlib + # - name: Test workflow by config (install from pip) + # run: | + # $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + # sudo $CONDA/bin/python -m pip uninstall -y pyqlib # Test Qlib installed from source - name: Install Qlib from source From 6dcbf51298f8f5abf0c9d2a0ed90bba29626073d Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Sat, 24 Jul 2021 11:36:12 +0000 Subject: [PATCH 53/61] update action --- .github/workflows/test.yml | 45 +++++++++++++++++++------------- .github/workflows/test_macos.yml | 18 ++++++++----- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 490c06246..29265b1eb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,25 +36,25 @@ jobs: shell: bash # Test Qlib installed with pip - - name: Install Qlib with pip - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe -m pip install numpy==1.19.5 - $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user - else - sudo $CONDA/bin/python -m pip install numpy==1.19.5 - sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy - fi - shell: bash + # - name: Install Qlib with pip + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe -m pip install numpy==1.19.5 + # $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user + # else + # sudo $CONDA/bin/python -m pip install numpy==1.19.5 + # sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy + # fi + # shell: bash - - name: Test data downloads - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - else - $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - fi - shell: bash + # - name: Test data downloads + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # else + # $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # fi + # shell: bash # - name: Test workflow by config (install from pip) # run: | @@ -83,6 +83,15 @@ jobs: fi shell: bash + - name: Test data downloads + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + else + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + fi + shell: bash + - name: Install test dependencies run: | if [ "$RUNNER_OS" == "Windows" ]; then diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml index 5b34d84c2..e52c27786 100644 --- a/.github/workflows/test_macos.yml +++ b/.github/workflows/test_macos.yml @@ -30,19 +30,19 @@ jobs: $CONDA/bin/python -m black qlib -l 120 --check --diff # Test Qlib installed with pip - - name: Install Qlib with pip - run: | - sudo $CONDA/bin/python -m pip install numpy==1.19.5 - sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy + # - name: Install Qlib with pip + # run: | + # sudo $CONDA/bin/python -m pip install numpy==1.19.5 + # sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy - name: Install Lightgbm for MacOS run: | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm - - name: Test data downloads - run: | - $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # - name: Test data downloads + # run: | + # $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn # - name: Test workflow by config (install from pip) # run: | @@ -57,6 +57,10 @@ jobs: sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't sudo $CONDA/bin/python setup.py install + - name: Test data downloads + run: | + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + - name: Install test dependencies run: | sudo $CONDA/bin/python -m pip install --upgrade pip From 4ffb05ae596468cff11f1ec70d0b7a2169a9617a Mon Sep 17 00:00:00 2001 From: wangwenxi-handsome <77676340+wangwenxi-handsome@users.noreply.github.com> Date: Fri, 23 Jul 2021 18:25:24 +0800 Subject: [PATCH 54/61] Update Action --- .github/workflows/test.yml | 67 ++++++++++++++++++-------------- .github/workflows/test_macos.yml | 26 +++++++------ 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a78d2d9a..29265b1eb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,36 +36,36 @@ jobs: shell: bash # Test Qlib installed with pip - - name: Install Qlib with pip - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe -m pip install numpy==1.19.5 - $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user - else - sudo $CONDA/bin/python -m pip install numpy==1.19.5 - sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy - fi - shell: bash + # - name: Install Qlib with pip + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe -m pip install numpy==1.19.5 + # $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user + # else + # sudo $CONDA/bin/python -m pip install numpy==1.19.5 + # sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy + # fi + # shell: bash - - name: Test data downloads - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - else - $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - fi - shell: bash + # - name: Test data downloads + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # else + # $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # fi + # shell: bash - - name: Test workflow by config (install from pip) - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml - $CONDA\\python.exe -m pip uninstall -y pyqlib - else - $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml - sudo $CONDA/bin/python -m pip uninstall -y pyqlib - fi - shell: bash + # - name: Test workflow by config (install from pip) + # run: | + # if [ "$RUNNER_OS" == "Windows" ]; then + # $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml + # $CONDA\\python.exe -m pip uninstall -y pyqlib + # else + # $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + # sudo $CONDA/bin/python -m pip uninstall -y pyqlib + # fi + # shell: bash # Test Qlib installed from source - name: Install Qlib from source @@ -83,6 +83,15 @@ jobs: fi shell: bash + - name: Test data downloads + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + else + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + fi + shell: bash + - name: Install test dependencies run: | if [ "$RUNNER_OS" == "Windows" ]; then @@ -111,4 +120,4 @@ jobs: else $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml fi - shell: bash \ No newline at end of file + shell: bash diff --git a/.github/workflows/test_macos.yml b/.github/workflows/test_macos.yml index 57aa87ded..e52c27786 100644 --- a/.github/workflows/test_macos.yml +++ b/.github/workflows/test_macos.yml @@ -30,24 +30,24 @@ jobs: $CONDA/bin/python -m black qlib -l 120 --check --diff # Test Qlib installed with pip - - name: Install Qlib with pip - run: | - sudo $CONDA/bin/python -m pip install numpy==1.19.5 - sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy + # - name: Install Qlib with pip + # run: | + # sudo $CONDA/bin/python -m pip install numpy==1.19.5 + # sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy - name: Install Lightgbm for MacOS run: | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)" HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm - - name: Test data downloads - run: | - $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + # - name: Test data downloads + # run: | + # $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn - - name: Test workflow by config (install from pip) - run: | - $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml - sudo $CONDA/bin/python -m pip uninstall -y pyqlib + # - name: Test workflow by config (install from pip) + # run: | + # $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + # sudo $CONDA/bin/python -m pip uninstall -y pyqlib # Test Qlib installed from source - name: Install Qlib from source @@ -57,6 +57,10 @@ jobs: sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't sudo $CONDA/bin/python setup.py install + - name: Test data downloads + run: | + $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn + - name: Install test dependencies run: | sudo $CONDA/bin/python -m pip install --upgrade pip From e88c45e13ceeafbf8761f068e6b1398265404f38 Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Sun, 25 Jul 2021 12:38:54 +0000 Subject: [PATCH 55/61] update position --- qlib/backtest/account.py | 14 ++++++++ qlib/backtest/position.py | 68 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 13213c344..03e51c740 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -73,6 +73,18 @@ class Account: pos_type: str = "Position", port_metr_enabled: bool = True, ): + """the trade account of backtest. + + Parameters + ---------- + init_cash : float, optional + initial cash, by default 1e9 + position_dict : Dict[stock_id, {"amount": int, "price"(optional): float}], optional + initial stocks with amount and price, + if there is no price key in the dict of stocks, it will be filled by latest close price from qlib. + by default {}. + """ + self._pos_type = pos_type self._port_metr_enabled = port_metr_enabled self.init_vars(init_cash, position_dict, freq, benchmark_config) @@ -93,6 +105,8 @@ class Account: "kwargs": { "cash": init_cash, "position_dict": position_dict, + "start_time": benchmark_config["start_time"], + "freq": freq, }, "module_path": "qlib.backtest.position", } diff --git a/qlib/backtest/position.py b/qlib/backtest/position.py index 7c32edc81..92b66a342 100644 --- a/qlib/backtest/position.py +++ b/qlib/backtest/position.py @@ -4,10 +4,14 @@ import copy import pathlib -from typing import Dict, List +from typing import Dict, List, Union + import pandas as pd +from datetime import timedelta import numpy as np + from .order import Order +from ..data.data import D class BasePosition: @@ -199,14 +203,72 @@ class Position(BasePosition): } """ - def __init__(self, cash=0, position_dict={}): + def __init__(self, start_time, freq, cash: float = 0, position_dict: Dict[str, Dict[str, float]] = {}): + """Init position by cash and position_dict. + + Parameters + ---------- + start_time : + the start time of backtest. It's for filling the initial value of stocks. + cash : float, optional + initial cash in account, by default 0 + position_dict : Dict[stock_id, {"amount": int, "price"(optional): float}], optional + initial stocks with parameters amount and price, + if there is no price key in the dict of stocks, it will be filled by _fill_stock_value. + by default {}. + """ + # NOTE: The position dict must be copied!!! # Otherwise the initial value self.init_cash = cash - self.position = position_dict.copy() + self.position = self._fill_stock_value(position_dict.copy(), start_time, freq) self.position["cash"] = cash self.position["now_account_value"] = self.calculate_value() + def _fill_stock_value( + self, position_dict: dict, start_time: Union[str, pd.Timestamp], freq: str, last_days: int = 30 + ): + """fill the stock value by the close price of latest last_days from qlib. + + Parameters + ---------- + position_dict : Dict[stock_id, {"amount": int, "price": float}] + initial holding stocks. + start_time : + the start time of backtest. + last_days : int, optional + the days to get the latest close price, by default 30. + + Return + ---------- + Dict[stock_id, {"amount": int, "price": float}] + initial holding stocks with filled price. + """ + + stock_list = [] + for stock in position_dict: + if ("price" not in position_dict[stock]) or (position_dict[stock]["price"] is None): + stock_list.append(stock) + + if len(stock_list) == 0: + return position_dict + + start_time = pd.Timestamp(start_time) + # note that start time is 2020-01-01 00:00:00 if raw start time is "2020-01-01" + price_end_time = start_time + price_start_time = start_time - timedelta(days=last_days) + price_df = D.features( + stock_list, ["$close"], price_start_time, price_end_time, freq=freq, disk_cache=True + ).dropna() + price_dict = price_df.groupby(["instrument"]).tail(1).reset_index(level=1, drop=True)["$close"].to_dict() + + if len(price_dict) < len(stock_list): + raise ValueError(f"there is no close price in qlib") + + for stock in stock_list: + position_dict[stock]["price"] = price_dict[stock] + return position_dict + def _init_stock(self, stock_id, amount, price=None): """ initialization the stock in current position From c202a4b1e635e3bd342ac458ffae8e34e763876a Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Mon, 26 Jul 2021 11:21:05 +0000 Subject: [PATCH 56/61] fix _get_base_vol_pri clip_time_range --- qlib/backtest/report.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 5f8238504..6bc7cc379 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -21,6 +21,7 @@ from ..data import D from ..tests.config import CSI300_BENCH from ..utils.resam import get_higher_eq_freq_feature, resam_ts_data from ..utils.time import Freq +from .order import IdxTradeRange class Report: @@ -357,9 +358,11 @@ class Indicator: agg = pa_config.get("agg", "twap").lower() price = pa_config.get("price", "deal_price").lower() - # NOTE: IndexTradeRange is not supported!!!!! Because inner index is not available - trade_start_time, trade_end_time = decision.trade_range.clip_time_range( - start_time=trade_start_time, end_time=trade_end_time + if(decision.trade_range is not None): + if(isinstance(decision.trade_range, IdxTradeRange)): + raise TypeError(f"IdxTradeRange is not supported") + trade_start_time, trade_end_time = decision.trade_range.clip_time_range( + start_time=trade_start_time, end_time=trade_end_time ) if price == "deal_price": From 4924717276a48ed9b1e36bee8d48d50649dba02f Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Mon, 26 Jul 2021 11:25:14 +0000 Subject: [PATCH 57/61] fix black --- qlib/backtest/report.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 6bc7cc379..95048ba84 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -358,12 +358,12 @@ class Indicator: agg = pa_config.get("agg", "twap").lower() price = pa_config.get("price", "deal_price").lower() - if(decision.trade_range is not None): - if(isinstance(decision.trade_range, IdxTradeRange)): + if decision.trade_range is not None: + if isinstance(decision.trade_range, IdxTradeRange): raise TypeError(f"IdxTradeRange is not supported") trade_start_time, trade_end_time = decision.trade_range.clip_time_range( start_time=trade_start_time, end_time=trade_end_time - ) + ) if price == "deal_price": price_s = trade_exchange.get_deal_price( From fcca242807c18c4ec5053876232145a6234c9b78 Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 26 Jul 2021 17:05:33 +0000 Subject: [PATCH 58/61] add cash settlement mechanism --- qlib/backtest/__init__.py | 2 +- qlib/backtest/account.py | 15 ++- qlib/backtest/exchange.py | 39 ++++--- qlib/backtest/executor.py | 63 ++++++------ qlib/backtest/order.py | 10 +- qlib/backtest/position.py | 134 ++++++++++++++----------- qlib/contrib/strategy/rule_strategy.py | 20 +++- qlib/utils/exceptions.py | 7 +- 8 files changed, 170 insertions(+), 120 deletions(-) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index 23b8ec9c5..a97841da7 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -184,7 +184,7 @@ def backtest( exchange_kwargs={}, pos_type: str = "Position", ): - """initialize the strategy and executor, then backtest funciton for the interaction of the outermost strategy and executor in the nested decision execution + """initialize the strategy and executor, then backtest function for the interaction of the outermost strategy and executor in the nested decision execution Parameters ---------- diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 03e51c740..773e1a037 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -1,9 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - - +from __future__ import annotations import copy -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, TYPE_CHECKING from qlib.utils import init_instance_by_config import warnings import pandas as pd @@ -11,7 +10,9 @@ import pandas as pd from .position import BasePosition, InfPosition, Position from .report import Report, Indicator from .order import BaseTradeDecision, Order -from .exchange import Exchange + +if TYPE_CHECKING: + from .exchange import Exchange """ rtn & earning in the Account @@ -105,8 +106,6 @@ class Account: "kwargs": { "cash": init_cash, "position_dict": position_dict, - "start_time": benchmark_config["start_time"], - "freq": freq, }, "module_path": "qlib.backtest.position", } @@ -122,7 +121,7 @@ class Account: self.report = Report(freq, benchmark_config) self.positions = {} - # trading related matric(e.g. high-frequency trading) + # trading related metrics(e.g. high-frequency trading) self.indicator = Indicator() def reset(self, freq=None, benchmark_config=None, init_report=False, port_metr_enabled: bool = None): @@ -302,7 +301,7 @@ class Account: if atomic is True and trade_info is None: raise ValueError("trade_info is necessary in atomic executor") elif atomic is False and inner_order_indicators is None: - raise ValueError("inner_order_indicators is necessary in unatomic executor") + raise ValueError("inner_order_indicators is necessary in un-atomic executor") # TODO: `update_bar_count` and `update_current` should placed in Position and be merged. self.update_bar_count() diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index e73510743..9044179e0 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -1,8 +1,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from __future__ import annotations +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from .account import Account -from qlib.backtest.position import Position +from qlib.backtest.position import BasePosition, Position import random import logging from typing import List, Tuple, Union, Callable, Iterable @@ -278,7 +282,7 @@ class Exchange: else: return True - def deal_order(self, order, trade_account=None, position=None): + def deal_order(self, order, trade_account: Account = None, position: BasePosition = None): """ Deal order when the actual transaction @@ -289,13 +293,12 @@ class Exchange: :param position: position to be updated after dealing the order. :return: trade_val, trade_cost, trade_price """ - # need to check order first - # TODO: check the order unit limit in the exchange!!!! - # The order limit is related to the adj factor and the cur_amount. - # factor = self.quote[(order.stock_id, order.trade_date)]['$factor'] - # cur_amount = trade_account.current.get_stock_amount(order.stock_id) + # check order first. if self.check_order(order) is False: - raise AttributeError("need to check order first") + order.deal_amount = 0.0 + # using np.nan instead of None to make it more convenient to should the value in format string + return 0.0, 0.0, np.nan + if trade_account is not None and position is not None: raise ValueError("trade_account and position can only choose one") @@ -304,14 +307,18 @@ class Exchange: trade_val, trade_cost = self._calc_trade_info_by_order( order, trade_account.current if trade_account else position ) - # update account if order.deal_amount > 1e-5: - # If the order can only be deal 0 aomount. Nothing to be updated - # Otherwise, it will result some stock with 0 amount in the position + # If the order can only be deal 0 amount. Nothing to be updated + # Otherwise, it will result in + # 1) some stock with 0 amount in the position + # 2) `trade_unit` of trade_cost will be lost in user account if trade_account: trade_account.update_order(order=order, trade_val=trade_val, cost=trade_cost, trade_price=trade_price) elif position: position.update_order(order=order, trade_val=trade_val, cost=trade_cost, trade_price=trade_price) + else: + # if dealing is not successful, the trade_cost should be zero + trade_cost = 0 return trade_val, trade_cost, trade_price @@ -346,7 +353,7 @@ class Exchange: `None`: if the stock is suspended `None` may be returned `float`: return factor if the factor exists """ - assert (start_time is not None and end_time is not None, "the time range must be given") + assert start_time is not None and end_time is not None, "the time range must be given" if stock_id not in self.quote.get_all_stock(): return None return self.quote.get_data(stock_id, start_time, end_time, fields="$factor", method=ts_data_last) @@ -509,7 +516,7 @@ class Exchange: ) return value - def _get_factor_or_raise_erorr(self, factor: float = None, stock_id: str = None, start_time=None, end_time=None): + def _get_factor_or_raise_error(self, factor: float = None, stock_id: str = None, start_time=None, end_time=None): """Please refer to the docs of get_amount_of_trade_unit""" if factor is None: if stock_id is not None and start_time is not None and end_time is not None: @@ -537,7 +544,7 @@ class Exchange: the end time of trading range """ if not self.trade_w_adj_price and self.trade_unit is not None: - factor = self._get_factor_or_raise_erorr( + factor = self._get_factor_or_raise_error( factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time ) return self.trade_unit / factor @@ -556,7 +563,7 @@ class Exchange: """ if not self.trade_w_adj_price and self.trade_unit is not None: # the minimal amount is 1. Add 0.1 for solving precision problem. - factor = self._get_factor_or_raise_erorr( + factor = self._get_factor_or_raise_error( factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time ) return (deal_amount * factor + 0.1) // self.trade_unit * self.trade_unit / factor @@ -626,7 +633,7 @@ class Exchange: order.stock_id, order.start_time, order.end_time, order.deal_amount ) trade_val = order.deal_amount * trade_price - trade_cost = trade_val * self.open_cost + trade_cost = max(trade_val * self.open_cost, self.min_cost) else: raise NotImplementedError("order type {} error".format(order.type)) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index b05b73801..89f5a2c4a 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -1,5 +1,6 @@ from abc import abstractclassmethod, abstractmethod import copy +from qlib.backtest.position import BasePosition from qlib.log import get_module_logger from types import GeneratorType from qlib.backtest.account import Account @@ -32,6 +33,7 @@ class BaseExecutor: track_data: bool = False, trade_exchange: Exchange = None, common_infra: CommonInfrastructure = None, + settle_type=BasePosition.ST_NO, **kwargs, ): """ @@ -95,6 +97,8 @@ class BaseExecutor: - trade_exchange : Exchange, optional exchange that provides market info + settle_type : str + Please refer to the docs of BasePosition.settle_start """ self.time_per_step = time_per_step self.indicator_config = indicator_config @@ -104,6 +108,7 @@ class BaseExecutor: self._trade_exchange = trade_exchange self.level_infra = LevelInfrastructure() self.level_infra.reset_infra(common_infra=common_infra) + self._settle_type = settle_type self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra) if common_infra is None: get_module_logger("BaseExecutor").warning(f"`common_infra` is not set for {self}") @@ -235,6 +240,9 @@ class BaseExecutor: if atomic and trade_decision.get_range_limit(default_value=None) is not None: raise ValueError("atomic executor doesn't support specify `range_limit`") + if self._settle_type != BasePosition.ST_NO: + self.trade_account.current.settle_start(self._settle_type) + obj = self._collect_data(trade_decision=trade_decision, level=level) if isinstance(obj, GeneratorType): @@ -256,6 +264,10 @@ class BaseExecutor: ) self.trade_calendar.step() + + if self._settle_type != BasePosition.ST_NO: + self.trade_account.current.settle_commit() + if return_value is not None: return_value.update({"execute_result": res}) return res @@ -366,7 +378,7 @@ class NestedExecutor(BaseExecutor): trade_decision = self._update_trade_decision(trade_decision) if trade_decision.empty() and self._skip_empty_decision: - # give one chance for outer stategy to update the strategy + # give one chance for outer strategy to update the strategy # - For updating some information in the sub executor(the strategy have no knowledge of the inner # executor when generating the decision) break @@ -409,6 +421,9 @@ class NestedExecutor(BaseExecutor): class SimulatorExecutor(BaseExecutor): """Executor that simulate the true market""" + # TODO: TT_SERIAL & TT_PARAL will be replaced by feature fix_pos now. + # Please remove them in the future. + # available trade_types TT_SERIAL = "serial" ## The orders will be executed serially in a sequence @@ -486,34 +501,22 @@ class SimulatorExecutor(BaseExecutor): execute_result = [] for order in self._get_order_iterator(trade_decision): - if self.trade_exchange.check_order(order) is True: - # execute the order. - # NOTE: The trade_account will be changed in this function - trade_val, trade_cost, trade_price = self.trade_exchange.deal_order( - order, trade_account=self.trade_account - ) - execute_result.append((order, trade_val, trade_cost, trade_price)) - if self.verbose: - if order.direction == Order.SELL: # sell - action = "sell" - else: - action = "buy" - print( - "[I {:%Y-%m-%d %H:%M:%S}]: {} {}, price {:.2f}, amount {}, deal_amount {}, factor {}, value {:.2f}, cach {:.2f}.".format( - trade_start_time, - action, - order.stock_id, - trade_price, - order.amount, - order.deal_amount, - order.factor, - trade_val, - self.trade_account.get_cash(), - ) + # execute the order. + # NOTE: The trade_account will be changed in this function + trade_val, trade_cost, trade_price = self.trade_exchange.deal_order(order, trade_account=self.trade_account) + execute_result.append((order, trade_val, trade_cost, trade_price)) + if self.verbose: + print( + "[I {:%Y-%m-%d %H:%M:%S}]: {} {}, price {:.2f}, amount {}, deal_amount {}, factor {}, value {:.2f}, cash {:.2f}.".format( + trade_start_time, + "sell" if order.direction == Order.SELL else "buy", + order.stock_id, + trade_price, + order.amount, + order.deal_amount, + order.factor, + trade_val, + self.trade_account.get_cash(), ) - else: - if self.verbose: - print("[W {:%Y-%m-%d %H:%M:%S}]: {} wrong.".format(trade_start_time, order.stock_id)) - # do nothing - pass + ) return execute_result, {"trade_info": execute_result} diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index b99cdb8e3..bb615dc06 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -58,12 +58,19 @@ class Order: # 3) results # - users should not care about these values # - they are set by the backtest system after finishing the results. + # What the value should be about in all kinds of cases + # - not tradable: the deal_amount == 0 , factor is None + # - the stock is suspended and the entire order fails. No cost for this order + # - dealed or partially dealed: deal_amount >= 0 and factor is not None deal_amount: float = field(init=False) # `deal_amount` is a non-negative value factor: float = field(init=False) + # TODO: + # a status field to indicate the dealing result of the order + # FIXME: # for compatible now. - # Plese remove them in the future + # Please remove them in the future SELL: ClassVar[OrderDir] = OrderDir.SELL BUY: ClassVar[OrderDir] = OrderDir.BUY @@ -71,6 +78,7 @@ class Order: if self.direction not in {Order.SELL, Order.BUY}: raise NotImplementedError("direction not supported, `Order.SELL` for sell, `Order.BUY` for buy") self.deal_amount = 0 + self.factor = None @property def amount_delta(self) -> float: diff --git a/qlib/backtest/position.py b/qlib/backtest/position.py index 92b66a342..e4f1ab40c 100644 --- a/qlib/backtest/position.py +++ b/qlib/backtest/position.py @@ -20,8 +20,8 @@ class BasePosition: Please refer to the `Position` class for the position """ - def __init__(self, cash=0.0, *args, **kwargs) -> None: - pass + def __init__(self, cash=0.0, *args, **kwargs): + self._settle_type = self.ST_NO def skip_update(self) -> bool: """ @@ -124,13 +124,16 @@ class BasePosition: """ raise NotImplementedError(f"Please implement the `get_stock_amount` method") - def get_cash(self) -> float: + def get_cash(self, include_settle: bool = False) -> float: """ Returns ------- float: - the cash in position + the available(tradable) cash in position + include_settle: + will the unsettled(delayed) cash included + Default: not include those unavailable cash """ raise NotImplementedError(f"Please implement the `get_cash` method") @@ -188,6 +191,37 @@ class BasePosition: """ raise NotImplementedError(f"Please implement the `add_count_all` method") + ST_CASH = "cash" + ST_NO = None + + def settle_start(self, settle_type: str): + """ + settlement start + It will act like start and commit a transaction + + Parameters + ---------- + settle_type : str + Should we make delay the settlement in each execution (each execution will make the executor a step forward) + - "cash": make the cash settlement delayed. + - The cash you get can't be used in current step (e.g. you can't sell a stock to get cash to buy another + stock) + - None: not settlement mechanism + - TODO: other assets will be supported in the future. + """ + raise NotImplementedError(f"Please implement the `settle_conf` method") + + def settle_commit(self): + """ + settlement commit + + Parameters + ---------- + settle_type : str + please refer to the documents of Executor + """ + raise NotImplementedError(f"Please implement the `settle_commit` method") + class Position(BasePosition): """Position @@ -203,7 +237,7 @@ class Position(BasePosition): } """ - def __init__(self, start_time, freq, cash: float = 0, position_dict: Dict[str, Dict[str, float]] = {}): + def __init__(self, cash: float = 0, position_dict: Dict[str, Dict[str, float]] = {}): """Init position by cash and position_dict. Parameters @@ -217,11 +251,12 @@ class Position(BasePosition): if there is no price key in the dict of stocks, it will be filled by _fill_stock_value. by default {}. """ + super().__init__() # NOTE: The position dict must be copied!!! # Otherwise the initial value self.init_cash = cash - self.position = self._fill_stock_value(position_dict.copy(), start_time, freq) + self.position = position_dict.copy() self.position["cash"] = cash self.position["now_account_value"] = self.calculate_value() @@ -312,7 +347,13 @@ class Position(BasePosition): elif abs(self.position[stock_id]["amount"]) <= 1e-5: self._del_stock(stock_id) - self.position["cash"] += trade_val - cost + new_cash = trade_val - cost + if self._settle_type == self.ST_CASH: + self.position["cash_delay"] += new_cash + elif self._settle_type == self.ST_NO: + self.position["cash"] += new_cash + else: + raise NotImplementedError(f"This type of input is not supported") def _del_stock(self, stock_id): del self.position[stock_id] @@ -340,9 +381,6 @@ class Position(BasePosition): def update_stock_weight(self, stock_id, weight): self.position[stock_id]["weight"] = weight - def update_cash(self, cash): - self.position["cash"] = cash - def calculate_stock_value(self): stock_list = self.get_stock_list() value = 0 @@ -352,11 +390,11 @@ class Position(BasePosition): def calculate_value(self): value = self.calculate_stock_value() - value += self.position["cash"] + value += self.position["cash"] + self.position.get("cash_delay", 0.0) return value def get_stock_list(self): - stock_list = list(set(self.position.keys()) - {"cash", "now_account_value"}) + stock_list = list(set(self.position.keys()) - {"cash", "now_account_value", "cash_delay"}) return stock_list def get_stock_price(self, code): @@ -375,8 +413,11 @@ class Position(BasePosition): def get_stock_weight(self, code): return self.position[code]["weight"] - def get_cash(self): - return self.position["cash"] + def get_cash(self, include_settle=False): + cash = self.position["cash"] + if include_settle: + cash += self.position.get("cash_delay", 0.0) + return cash def get_stock_amount_dict(self): """generate stock amount dict {stock_id : amount of stock}""" @@ -388,7 +429,7 @@ class Position(BasePosition): def get_stock_weight_dict(self, only_stock=False): """get_stock_weight_dict - generate stock weight fict {stock_id : value weight of stock in the position} + generate stock weight dict {stock_id : value weight of stock in the position} it is meaningful in the beginning or the end of each trade date :param only_stock: If only_stock=True, the weight of each stock in total stock will be returned @@ -417,49 +458,20 @@ class Position(BasePosition): for stock_code, weight in weight_dict.items(): self.update_stock_weight(stock_code, weight) - def save_position(self, path): - path = pathlib.Path(path) - p = copy.deepcopy(self.position) - cash = pd.Series(dtype=float) - cash["init_cash"] = self.init_cash - cash["cash"] = p["cash"] - cash["now_account_value"] = p["now_account_value"] - del p["cash"] - del p["now_account_value"] - positions = pd.DataFrame.from_dict(p, orient="index") - with pd.ExcelWriter(path) as writer: - positions.to_excel(writer, sheet_name="position") - cash.to_excel(writer, sheet_name="info") + def settle_start(self, settle_type): + assert self._settle_type == self.ST_NO, "Currently, settlement can't be nested!!!!!" + self._settle_type = settle_type + if settle_type == self.ST_CASH: + self.position["cash_delay"] = 0.0 - def load_position(self, path): - """load position information from a file - should have format below - sheet "position" - columns: ['stock', f'count_{bar}', 'amount', 'price', 'weight'] - f'count_{bar}': , - 'amount': , - 'price': , - 'weight': , - - sheet "cash" - index: ['init_cash', 'cash', 'now_account_value'] - 'init_cash': , - 'cash': , - 'now_account_value': - """ - path = pathlib.Path(path) - positions = pd.read_excel(open(path, "rb"), sheet_name="position", index_col=0) - cash_record = pd.read_excel(open(path, "rb"), sheet_name="info", index_col=0) - positions = positions.to_dict(orient="index") - init_cash = cash_record.loc["init_cash"].values[0] - cash = cash_record.loc["cash"].values[0] - now_account_value = cash_record.loc["now_account_value"].values[0] - # assign values - self.position = {} - self.init_cash = init_cash - self.position = positions - self.position["cash"] = cash - self.position["now_account_value"] = now_account_value + def settle_commit(self): + if self._settle_type != self.ST_NO: + if self._settle_type == self.ST_CASH: + self.position["cash"] += self.position["cash_delay"] + del self.position["cash_delay"] + else: + raise NotImplementedError(f"This type of input is not supported") + self._settle_type = self.ST_NO class InfPosition(BasePosition): @@ -502,7 +514,7 @@ class InfPosition(BasePosition): def get_stock_amount(self, code) -> float: return np.inf - def get_cash(self) -> float: + def get_cash(self, include_settle=False) -> float: return np.inf def get_stock_amount_dict(self) -> Dict: @@ -516,3 +528,9 @@ class InfPosition(BasePosition): def update_weight_all(self): raise NotImplementedError(f"InfPosition doesn't support update_weight_all") + + def settle_start(self, settle_type: str): + pass + + def settle_commit(self): + pass diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 36059f5a0..57ca005ff 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -18,7 +18,12 @@ from qlib.backtest.utils import get_start_end_idx class TWAPStrategy(BaseStrategy): - """TWAP Strategy for trading""" + """TWAP Strategy for trading + + NOTE: + - This TWAP strategy will celling round when trading. This will make the TWAP trading strategy produce the order + ealier when the total trade unit of amount is less than the trading step + """ def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs): """ @@ -583,7 +588,11 @@ class FileOrderStrategy(BaseStrategy): """ def __init__( - self, file: Union[IO, str, Path], trade_range: Union[Tuple[int, int], TradeRange] = None, *args, **kwargs + self, + file: Union[IO, str, Path, pd.DataFrame], + trade_range: Union[Tuple[int, int], TradeRange] = None, + *args, + **kwargs, ): """ @@ -611,8 +620,11 @@ class FileOrderStrategy(BaseStrategy): """ super().__init__(*args, **kwargs) - with get_io_object(file) as f: - self.order_df = pd.read_csv(f, dtype={"datetime": np.str}) + if isinstance(file, pd.DataFrame): + self.order_df = file + else: + with get_io_object(file) as f: + self.order_df = pd.read_csv(f, dtype={"datetime": np.str}) self.order_df["datetime"] = self.order_df["datetime"].apply(pd.Timestamp) self.order_df = self.order_df.set_index(["datetime", "instrument"]) diff --git a/qlib/utils/exceptions.py b/qlib/utils/exceptions.py index dad12506b..dd9b3eaf6 100644 --- a/qlib/utils/exceptions.py +++ b/qlib/utils/exceptions.py @@ -1,17 +1,20 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. + # Base exception class class QlibException(Exception): def __init__(self, message): super(QlibException, self).__init__(message) -# Error type for reinitialization when starting an experiment class RecorderInitializationError(QlibException): + """Error type for re-initialization when starting an experiment""" + pass -# Error type for Recorder when can not load object class LoadObjectError(QlibException): + """Error type for Recorder when can not load object""" + pass From 66971d5f0ddc596083e69559e7373ca750c294ab Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Tue, 27 Jul 2021 09:06:13 +0000 Subject: [PATCH 59/61] fix indicator --- qlib/backtest/high_performance_ds.py | 30 ++++++++++++++++++++++------ qlib/backtest/report.py | 8 +++++--- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index 104be5b9c..d556f303c 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -172,15 +172,25 @@ class BaseSingleMetric: @property def empty(self) -> bool: """If metric is empyt, return True.""" + raise NotImplementedError(f"Please implement the `empty` method") def add(self, other: "BaseSingleMetric", fill_value: float = None) -> "BaseSingleMetric": """Replace np.NaN with fill_value in two metrics and add them.""" + raise NotImplementedError(f"Please implement the `add` method") - def map(self, map_dict: dict) -> "BaseSingleMetric": - """Replace the value of metric according to map_dict.""" - raise NotImplementedError(f"Please implement the `map` method") + def replace(self, replace_dict: dict) -> "BaseSingleMetric": + """Replace the value of metric according to replace_dict.""" + + raise NotImplementedError(f"Please implement the `replace` method") + + def apply(self, func: dict) -> "BaseSingleMetric": + """Replace the value of metric with func(metric). + Currently, the func is only qlib/backtest/order/Order.parse_dir. + """ + + raise NotImplementedError(f"Please implement the 'apply' method") class BaseOrderIndicator: @@ -371,8 +381,11 @@ class PandasSingleMetric: def add(self, other, fill_value=None): return PandasSingleMetric(self.metric.add(other.metric, fill_value=fill_value)) - def map(self, map_dict: dict): - return PandasSingleMetric(self.metric.apply(map_dict)) + def replace(self, replace_dict: dict): + return PandasSingleMetric(self.metric.replace(replace_dict)) + + def apply(self, func: Callable): + return PandasSingleMetric(self.metric.apply(func)) class PandasOrderIndicator(BaseOrderIndicator): @@ -413,6 +426,11 @@ class PandasOrderIndicator(BaseOrderIndicator): for metric in metrics: tmp_metric = PandasSingleMetric({}) for indicator in indicators: - tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) + if(metric == "trade_price"): + tmp_metric = tmp_metric.add( + indicator.data["trade_price"] * indicator.data["deal_amount"], fill_value + ) + else: + tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) metric_dict[metric] = tmp_metric.metric return metric_dict diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 95048ba84..64d00b436 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -308,7 +308,8 @@ class Indicator: def _update_order_fulfill_rate(self): def func(deal_amount, amount): - return deal_amount / amount + tmp_deal_amount = deal_amount.replace({np.NaN: 0}) + return deal_amount / tmp_deal_amount self.order_indicator.transfer(func, "ffr") @@ -323,12 +324,13 @@ class Indicator: self.order_indicator.assign(metric, metric_dict[metric]) def func(trade_price, deal_amount): - return trade_price / deal_amount + tmp_deal_amount = deal_amount.replace({0: np.NaN}) + return trade_price / tmp_deal_amount self.order_indicator.transfer(func, "trade_price") def func_apply(trade_dir): - return trade_dir.map(Order.parse_dir) + return trade_dir.apply(Order.parse_dir) self.order_indicator.transfer(func_apply, "trade_dir") From ba1c575aa9e79b3e86f4d9b9a56bd298d88fbb0a Mon Sep 17 00:00:00 2001 From: "wangwenxi.handsome" Date: Tue, 27 Jul 2021 12:14:43 +0000 Subject: [PATCH 60/61] doc and black for indicator --- qlib/backtest/high_performance_ds.py | 17 ++++++----------- qlib/backtest/report.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index d556f303c..123725832 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -82,7 +82,7 @@ class BaseQuote: the columns of data to fetch method : Union[str, Callable] the method apply to data. - e.g ["None", "last", "all", "sum", "mean", "any", qlib/utils/resam.py/ts_data_last] + e.g [None, "last", "all", "sum", "mean", "any", qlib/utils/resam.py/ts_data_last] Return ---------- @@ -177,19 +177,19 @@ class BaseSingleMetric: def add(self, other: "BaseSingleMetric", fill_value: float = None) -> "BaseSingleMetric": """Replace np.NaN with fill_value in two metrics and add them.""" - + raise NotImplementedError(f"Please implement the `add` method") def replace(self, replace_dict: dict) -> "BaseSingleMetric": """Replace the value of metric according to replace_dict.""" - + raise NotImplementedError(f"Please implement the `replace` method") def apply(self, func: dict) -> "BaseSingleMetric": """Replace the value of metric with func(metric). - Currently, the func is only qlib/backtest/order/Order.parse_dir. + Currently, the func is only qlib/backtest/order/Order.parse_dir. """ - + raise NotImplementedError(f"Please implement the 'apply' method") @@ -426,11 +426,6 @@ class PandasOrderIndicator(BaseOrderIndicator): for metric in metrics: tmp_metric = PandasSingleMetric({}) for indicator in indicators: - if(metric == "trade_price"): - tmp_metric = tmp_metric.add( - indicator.data["trade_price"] * indicator.data["deal_amount"], fill_value - ) - else: - tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) + tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) metric_dict[metric] = tmp_metric.metric return metric_dict diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 64d00b436..e37642244 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -308,8 +308,9 @@ class Indicator: def _update_order_fulfill_rate(self): def func(deal_amount, amount): + # deal_amount is np.NaN when there is no inner decision. So full fill rate is 0. tmp_deal_amount = deal_amount.replace({np.NaN: 0}) - return deal_amount / tmp_deal_amount + return tmp_deal_amount / amount self.order_indicator.transfer(func, "ffr") @@ -318,12 +319,21 @@ class Indicator: self._update_order_fulfill_rate() def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]): + # calculate total trade amount with each inner order indicator. + def trade_amount_func(deal_amount, trade_price): + return deal_amount * trade_price + + for indicator in inner_order_indicators: + indicator.transfer(trade_amount_func, "trade_price") + + # sum inner order indicators with same metric. all_metric = ["inner_amount", "deal_amount", "trade_price", "trade_value", "trade_cost", "trade_dir"] metric_dict = self.order_indicator_cls.sum_all_indicators(inner_order_indicators, all_metric, fill_value=0) for metric in metric_dict: self.order_indicator.assign(metric, metric_dict[metric]) def func(trade_price, deal_amount): + # trade_price is np.NaN instead of inf when deal_amount is zero. tmp_deal_amount = deal_amount.replace({0: np.NaN}) return trade_price / tmp_deal_amount From 0d41ca26ab5fc7f70ed435214312a0730525a129 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 27 Jul 2021 14:16:18 +0000 Subject: [PATCH 61/61] fix data format bug & twap peeking strategy --- qlib/backtest/executor.py | 2 +- qlib/backtest/high_performance_ds.py | 20 +++++++++++++++++++- qlib/backtest/report.py | 10 ++++++---- qlib/contrib/strategy/rule_strategy.py | 10 +++++----- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 89f5a2c4a..0121a904e 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -405,7 +405,7 @@ class NestedExecutor(BaseExecutor): execute_result.extend(_inner_execute_result) inner_order_indicators.append( - self.inner_executor.trade_account.get_trade_indicator().get_order_indicator() + self.inner_executor.trade_account.get_trade_indicator().get_order_indicator(raw=True) ) else: # do nothing and just step forward diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index 123725832..c60d3f97e 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -3,7 +3,7 @@ import logging -from typing import List, Tuple, Union, Callable, Iterable, Dict +from typing import List, Text, Tuple, Union, Callable, Iterable, Dict from collections import OrderedDict import inspect @@ -280,6 +280,21 @@ class BaseOrderIndicator: pass + def to_series(self) -> Dict[Text, pd.Series]: + """return the metrics as pandas series + + for example: { "ffr": + SH600068 NaN + SH600079 1.0 + SH600266 NaN + ... + SZ300692 NaN + SZ300719 NaN, + ... + } + """ + raise NotImplementedError(f"Please implement the `to_series` method") + class PandasSingleMetric: """Each SingleMetric is based on pd.Series.""" @@ -429,3 +444,6 @@ class PandasOrderIndicator(BaseOrderIndicator): tmp_metric = tmp_metric.add(indicator.data[metric], fill_value) metric_dict[metric] = tmp_metric.metric return metric_dict + + def to_series(self): + return {k: v.metric for k, v in self.data.items()} diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index e37642244..fb1eeedfa 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -274,8 +274,8 @@ class Indicator: # self._trade_calendar = trade_calendar def record(self, trade_start_time): - self.order_indicator_his[trade_start_time] = self.order_indicator.data - self.trade_indicator_his[trade_start_time] = self.trade_indicator + self.order_indicator_his[trade_start_time] = self.get_order_indicator() + self.trade_indicator_his[trade_start_time] = self.get_trade_indicator() def _update_order_trade_info(self, trade_info: list): amount = dict() @@ -587,8 +587,10 @@ class Indicator: ) ) - def get_order_indicator(self): - return self.order_indicator + def get_order_indicator(self, raw: bool = False): + if raw: + return self.order_indicator + return self.order_indicator.to_series() def get_trade_indicator(self): return self.trade_indicator diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 57ca005ff..eabbe357b 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -63,11 +63,11 @@ class TWAPStrategy(BaseStrategy): trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) order_list = [] for order in self.outer_trade_decision.get_decision(): - # if not tradable, continue - if not self.trade_exchange.is_stock_tradable( - stock_id=order.stock_id, start_time=trade_start_time, end_time=trade_end_time - ): - continue + # Don't peek the future information + # if not self.trade_exchange.is_stock_tradable( + # stock_id=order.stock_id, start_time=trade_start_time, end_time=trade_end_time + # ): + # continue _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time )