mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-04 03:21:00 +08:00
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import inspect
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Union
|
||||
@@ -37,18 +38,24 @@ class RiskModel(BaseModel):
|
||||
self.scale_return = scale_return
|
||||
|
||||
def predict(
|
||||
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
|
||||
) -> Union[pd.DataFrame, np.ndarray]:
|
||||
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True,
|
||||
return_decomposed_components=False,
|
||||
) -> Union[pd.DataFrame, np.ndarray, tuple]:
|
||||
"""
|
||||
Args:
|
||||
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
|
||||
with variables as columns and observations as rows.
|
||||
return_corr (bool): whether return the correlation matrix.
|
||||
is_price (bool): whether `X` contains price (if not assume stock returns).
|
||||
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame or np.ndarray: estimated covariance (or correlation).
|
||||
"""
|
||||
assert (
|
||||
not return_corr or not return_decomposed_components
|
||||
), "Can only return either correlation matrix or decomposed components."
|
||||
|
||||
# transform input into 2D array
|
||||
if not isinstance(X, (pd.Series, pd.DataFrame)):
|
||||
columns = None
|
||||
@@ -75,6 +82,14 @@ class RiskModel(BaseModel):
|
||||
# handle nan and centered
|
||||
X = self._preprocess(X)
|
||||
|
||||
# return decomposed components if needed
|
||||
if return_decomposed_components:
|
||||
assert 'return_decomposed_components' in inspect.getfullargspec(self._predict).args, \
|
||||
'This risk model does not support return decomposed components of the covariance matrix '
|
||||
|
||||
F, cov_b, var_u = self._predict(X, return_decomposed_components=True)
|
||||
return F, cov_b, var_u
|
||||
|
||||
# estimate covariance
|
||||
S = self._predict(X)
|
||||
|
||||
@@ -126,12 +141,3 @@ class RiskModel(BaseModel):
|
||||
if not self.assume_centered:
|
||||
X = X - np.nanmean(X, axis=0)
|
||||
return X
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -60,81 +60,13 @@ class StructuredCovEstimator(RiskModel):
|
||||
|
||||
self.num_factors = num_factors
|
||||
|
||||
def predict(
|
||||
self,
|
||||
X: Union[pd.Series, pd.DataFrame, np.ndarray],
|
||||
return_corr: bool = False,
|
||||
is_price: bool = True,
|
||||
return_decomposed_components=False,
|
||||
) -> Union[pd.DataFrame, np.ndarray, tuple]:
|
||||
"""
|
||||
Args:
|
||||
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
|
||||
with variables as columns and observations as rows.
|
||||
return_corr (bool): whether return the correlation matrix.
|
||||
is_price (bool): whether `X` contains price (if not assume stock returns).
|
||||
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
|
||||
|
||||
Returns:
|
||||
tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
|
||||
"""
|
||||
assert (
|
||||
not return_corr or not return_decomposed_components
|
||||
), "Can only return either correlation matrix or decomposed components."
|
||||
|
||||
# transform input into 2D array
|
||||
if not isinstance(X, (pd.Series, pd.DataFrame)):
|
||||
columns = None
|
||||
else:
|
||||
if isinstance(X.index, pd.MultiIndex):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.iloc[:, 0].unstack(level="instrument") # always use the first column
|
||||
else:
|
||||
X = X.unstack(level="instrument")
|
||||
else:
|
||||
# X is 2D DataFrame
|
||||
pass
|
||||
columns = X.columns # will be used to restore dataframe
|
||||
X = X.values
|
||||
|
||||
# calculate pct_change
|
||||
if is_price:
|
||||
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
|
||||
|
||||
# scale return
|
||||
if self.scale_return:
|
||||
X *= 100
|
||||
|
||||
# handle nan and centered
|
||||
X = self._preprocess(X)
|
||||
|
||||
if return_decomposed_components:
|
||||
F, cov_b, var_u = self._predict(X, return_structured=True)
|
||||
return F, cov_b, var_u
|
||||
else:
|
||||
# estimate covariance
|
||||
S = self._predict(X)
|
||||
|
||||
# return correlation if needed
|
||||
if return_corr:
|
||||
vola = np.sqrt(np.diag(S))
|
||||
corr = S / np.outer(vola, vola)
|
||||
if columns is None:
|
||||
return corr
|
||||
return pd.DataFrame(corr, index=columns, columns=columns)
|
||||
|
||||
# return covariance
|
||||
if columns is None:
|
||||
return S
|
||||
return pd.DataFrame(S, index=columns, columns=columns)
|
||||
|
||||
def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
|
||||
def _predict(self, X: np.ndarray, return_decomposed_components=False) -> Union[np.ndarray, tuple]:
|
||||
"""
|
||||
covariance estimation implementation
|
||||
|
||||
Args:
|
||||
X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
|
||||
return_structured (bool): whether return decomposed components of the covariance matrix.
|
||||
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.
|
||||
|
||||
Returns:
|
||||
tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
|
||||
@@ -148,7 +80,7 @@ class StructuredCovEstimator(RiskModel):
|
||||
cov_b = np.cov(B.T) # num_factors x num_factors
|
||||
var_u = np.var(U, axis=0) # diagonal
|
||||
|
||||
if return_structured:
|
||||
if return_decomposed_components:
|
||||
return F, cov_b, var_u
|
||||
|
||||
cov_x = F @ cov_b @ F.T + np.diag(var_u)
|
||||
|
||||
@@ -28,7 +28,7 @@ class TestStructuredCovEstimator(unittest.TestCase):
|
||||
self.assertTrue(if_identical)
|
||||
|
||||
def test_nan_option_covariance(self):
|
||||
# Try to estimate the covariance from a randomly generated matrix.
|
||||
# Test if nan_option is correctly passed.
|
||||
NUM_VARIABLE = 10
|
||||
NUM_OBSERVATION = 200
|
||||
EPS = 1e-6
|
||||
@@ -45,6 +45,19 @@ class TestStructuredCovEstimator(unittest.TestCase):
|
||||
|
||||
self.assertTrue(if_identical)
|
||||
|
||||
def test_decompose_covariance(self):
|
||||
# Test if return_decomposed_components is correctly passed.
|
||||
NUM_VARIABLE = 10
|
||||
NUM_OBSERVATION = 200
|
||||
|
||||
estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, nan_option='fill')
|
||||
|
||||
X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE)
|
||||
|
||||
F, cov_b, var_u = estimator.predict(X, is_price=False, return_decomposed_components=True)
|
||||
|
||||
self.assertTrue(F is not None and cov_b is not None and var_u is not None)
|
||||
|
||||
def test_constructed_covariance(self):
|
||||
# Try to estimate the covariance from a specially crafted matrix.
|
||||
# There should be some significant correlation since X is specially crafted.
|
||||
|
||||
Reference in New Issue
Block a user