From 351d598c9f45a59d96fb0be1d57bbbc662d756f6 Mon Sep 17 00:00:00 2001 From: Charles Young Date: Mon, 8 Mar 2021 17:49:59 +0800 Subject: [PATCH] Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r589165409 --- qlib/model/riskmodel/base.py | 28 ++++++---- qlib/model/riskmodel/structured.py | 74 ++------------------------ tests/test_structured_cov_estimator.py | 15 +++++- 3 files changed, 34 insertions(+), 83 deletions(-) diff --git a/qlib/model/riskmodel/base.py b/qlib/model/riskmodel/base.py index 02ab8c2fb..89df80e8f 100644 --- a/qlib/model/riskmodel/base.py +++ b/qlib/model/riskmodel/base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import inspect import numpy as np import pandas as pd from typing import Union @@ -37,18 +38,24 @@ class RiskModel(BaseModel): self.scale_return = scale_return def predict( - self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True - ) -> Union[pd.DataFrame, np.ndarray]: + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True, + return_decomposed_components=False, + ) -> Union[pd.DataFrame, np.ndarray, tuple]: """ Args: X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, with variables as columns and observations as rows. return_corr (bool): whether return the correlation matrix. is_price (bool): whether `X` contains price (if not assume stock returns). + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. Returns: pd.DataFrame or np.ndarray: estimated covariance (or correlation). """ + assert ( + not return_corr or not return_decomposed_components + ), "Can only return either correlation matrix or decomposed components." + # transform input into 2D array if not isinstance(X, (pd.Series, pd.DataFrame)): columns = None @@ -75,6 +82,14 @@ class RiskModel(BaseModel): # handle nan and centered X = self._preprocess(X) + # return decomposed components if needed + if return_decomposed_components: + assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \ + 'This risk model does not support return decomposed components of the covariance matrix ' + + F, cov_b, var_u = self._predict(X, return_decomposed_components=True) + return F, cov_b, var_u + # estimate covariance S = self._predict(X) @@ -126,12 +141,3 @@ class RiskModel(BaseModel): if not self.assume_centered: X = X - np.nanmean(X, axis=0) return X - - - - - - - - - diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index 7b722e600..39ff0166e 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -60,81 +60,13 @@ class StructuredCovEstimator(RiskModel): self.num_factors = num_factors - def predict( - self, - X: Union[pd.Series, pd.DataFrame, np.ndarray], - return_corr: bool = False, - is_price: bool = True, - return_decomposed_components=False, - ) -> Union[pd.DataFrame, np.ndarray, tuple]: - """ - Args: - X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, - with variables as columns and observations as rows. - return_corr (bool): whether return the correlation matrix. - is_price (bool): whether `X` contains price (if not assume stock returns). - return_decomposed_components (bool): whether return decomposed components of the covariance matrix. - - Returns: - tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation. - """ - assert ( - not return_corr or not return_decomposed_components - ), "Can only return either correlation matrix or decomposed components." - - # transform input into 2D array - if not isinstance(X, (pd.Series, pd.DataFrame)): - columns = None - else: - if isinstance(X.index, pd.MultiIndex): - if isinstance(X, pd.DataFrame): - X = X.iloc[:, 0].unstack(level="instrument") # always use the first column - else: - X = X.unstack(level="instrument") - else: - # X is 2D DataFrame - pass - columns = X.columns # will be used to restore dataframe - X = X.values - - # calculate pct_change - if is_price: - X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows - - # scale return - if self.scale_return: - X *= 100 - - # handle nan and centered - X = self._preprocess(X) - - if return_decomposed_components: - F, cov_b, var_u = self._predict(X, return_structured=True) - return F, cov_b, var_u - else: - # estimate covariance - S = self._predict(X) - - # return correlation if needed - if return_corr: - vola = np.sqrt(np.diag(S)) - corr = S / np.outer(vola, vola) - if columns is None: - return corr - return pd.DataFrame(corr, index=columns, columns=columns) - - # return covariance - if columns is None: - return S - return pd.DataFrame(S, index=columns, columns=columns) - - def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]: + def _predict(self, X: np.ndarray, return_decomposed_components=False) -> Union[np.ndarray, tuple]: """ covariance estimation implementation Args: X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows). - return_structured (bool): whether return decomposed components of the covariance matrix. + return_decomposed_components (bool): whether return decomposed components of the covariance matrix. Returns: tuple or np.ndarray: decomposed covariance matrix or covariance matrix. @@ -148,7 +80,7 @@ class StructuredCovEstimator(RiskModel): cov_b = np.cov(B.T) # num_factors x num_factors var_u = np.var(U, axis=0) # diagonal - if return_structured: + if return_decomposed_components: return F, cov_b, var_u cov_x = F @ cov_b @ F.T + np.diag(var_u) diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py index 8ac1e8477..a3973be5a 100644 --- a/tests/test_structured_cov_estimator.py +++ b/tests/test_structured_cov_estimator.py @@ -28,7 +28,7 @@ class TestStructuredCovEstimator(unittest.TestCase): self.assertTrue(if_identical) def test_nan_option_covariance(self): - # Try to estimate the covariance from a randomly generated matrix. + # Test if nan_option is correctly passed. NUM_VARIABLE = 10 NUM_OBSERVATION = 200 EPS = 1e-6 @@ -45,6 +45,19 @@ class TestStructuredCovEstimator(unittest.TestCase): self.assertTrue(if_identical) + def test_decompose_covariance(self): + # Test if return_decomposed_components is correctly passed. + NUM_VARIABLE = 10 + NUM_OBSERVATION = 200 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill') + + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + + F, cov_b, var_u = estimator.predict(X, is_price=False, return_decomposed_components=True) + + self.assertTrue(F is not None and cov_b is not None and var_u is not None) + def test_constructed_covariance(self): # Try to estimate the covariance from a specially crafted matrix. # There should be some significant correlation since X is specially crafted.