1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 03:21:00 +08:00
Charles Young
2021-03-08 17:49:59 +08:00
parent 81b86f8022
commit 351d598c9f
3 changed files with 34 additions and 83 deletions

View File

@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import inspect
import numpy as np
import pandas as pd
from typing import Union
@@ -37,18 +38,24 @@ class RiskModel(BaseModel):
self.scale_return = scale_return
def predict(
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
) -> Union[pd.DataFrame, np.ndarray]:
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
return_decomposed_components=False,
) -> Union[pd.DataFrame, np.ndarray, tuple]:
"""
Args:
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
with variables as columns and observations as rows.
return_corr (bool): whether return the correlation matrix.
is_price (bool): whether `X` contains price (if not assume stock returns).
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
Returns:
pd.DataFrame or np.ndarray: estimated covariance (or correlation).
"""
assert (
not return_corr or not return_decomposed_components
), "Can only return either correlation matrix or decomposed components."
# transform input into 2D array
if not isinstance(X, (pd.Series, pd.DataFrame)):
columns = None
@@ -75,6 +82,14 @@ class RiskModel(BaseModel):
# handle nan and centered
X = self._preprocess(X)
# return decomposed components if needed
if return_decomposed_components:
assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \
'This risk model does not support return decomposed components of the covariance matrix '
F, cov_b, var_u = self._predict(X, return_decomposed_components=True)
return F, cov_b, var_u
# estimate covariance
S = self._predict(X)
@@ -126,12 +141,3 @@ class RiskModel(BaseModel):
if not self.assume_centered:
X = X - np.nanmean(X, axis=0)
return X

View File

@@ -60,81 +60,13 @@ class StructuredCovEstimator(RiskModel):
self.num_factors = num_factors
def predict(
self,
X: Union[pd.Series, pd.DataFrame, np.ndarray],
return_corr: bool = False,
is_price: bool = True,
return_decomposed_components=False,
) -> Union[pd.DataFrame, np.ndarray, tuple]:
"""
Args:
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
with variables as columns and observations as rows.
return_corr (bool): whether return the correlation matrix.
is_price (bool): whether `X` contains price (if not assume stock returns).
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
Returns:
tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
"""
assert (
not return_corr or not return_decomposed_components
), "Can only return either correlation matrix or decomposed components."
# transform input into 2D array
if not isinstance(X, (pd.Series, pd.DataFrame)):
columns = None
else:
if isinstance(X.index, pd.MultiIndex):
if isinstance(X, pd.DataFrame):
X = X.iloc[:, 0].unstack(level="instrument") # always use the first column
else:
X = X.unstack(level="instrument")
else:
# X is 2D DataFrame
pass
columns = X.columns # will be used to restore dataframe
X = X.values
# calculate pct_change
if is_price:
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
# scale return
if self.scale_return:
X *= 100
# handle nan and centered
X = self._preprocess(X)
if return_decomposed_components:
F, cov_b, var_u = self._predict(X, return_structured=True)
return F, cov_b, var_u
else:
# estimate covariance
S = self._predict(X)
# return correlation if needed
if return_corr:
vola = np.sqrt(np.diag(S))
corr = S / np.outer(vola, vola)
if columns is None:
return corr
return pd.DataFrame(corr, index=columns, columns=columns)
# return covariance
if columns is None:
return S
return pd.DataFrame(S, index=columns, columns=columns)
def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
def _predict(self, X: np.ndarray, return_decomposed_components=False) -> Union[np.ndarray, tuple]:
"""
covariance estimation implementation
Args:
X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
return_structured (bool): whether return decomposed components of the covariance matrix.
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
Returns:
tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
@@ -148,7 +80,7 @@ class StructuredCovEstimator(RiskModel):
cov_b = np.cov(B.T) # num_factors x num_factors
var_u = np.var(U, axis=0) # diagonal
if return_structured:
if return_decomposed_components:
return F, cov_b, var_u
cov_x = F @ cov_b @ F.T + np.diag(var_u)

View File

@@ -28,7 +28,7 @@ class TestStructuredCovEstimator(unittest.TestCase):
self.assertTrue(if_identical)
def test_nan_option_covariance(self):
# Try to estimate the covariance from a randomly generated matrix.
# Test if nan_option is correctly passed.
NUM_VARIABLE = 10
NUM_OBSERVATION = 200
EPS = 1e-6
@@ -45,6 +45,19 @@ class TestStructuredCovEstimator(unittest.TestCase):
self.assertTrue(if_identical)
def test_decompose_covariance(self):
# Test if return_decomposed_components is correctly passed.
NUM_VARIABLE = 10
NUM_OBSERVATION = 200
estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
F, cov_b, var_u = estimator.predict(X, is_price=False, return_decomposed_components=True)
self.assertTrue(F is not None and cov_b is not None and var_u is not None)
def test_constructed_covariance(self):
# Try to estimate the covariance from a specially crafted matrix.
# There should be some significant correlation since X is specially crafted.