diff --git a/tests/test_structured_cov_estimator.py b/tests/test_structured_cov_estimator.py new file mode 100644 index 000000000..6aeae3d89 --- /dev/null +++ b/tests/test_structured_cov_estimator.py @@ -0,0 +1,80 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import unittest +import numpy as np +from scipy.linalg import sqrtm + +from qlib.model.riskmodel import StructuredCovEstimator + + +class TestStructuredCovEstimator(unittest.TestCase): + def test_random_covariance(self): + # Try to estimate the covariance from a randomly generated matrix. + NUM_VARIABLE = 10 + NUM_OBSERVATION = 200 + EPS = 1e-6 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True) + + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + def test_constructed_covariance(self): + # Try to estimate the covariance from a specially crafted matrix. + # There should be some significant correlation since X is specially crafted. + NUM_VARIABLE = 7 + NUM_OBSERVATION = 500 + EPS = 0.1 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_VARIABLE - 1) + + sqrt_cov = None + while sqrt_cov is None or (np.iscomplex(sqrt_cov)).any(): + cov = np.random.rand(NUM_VARIABLE, NUM_VARIABLE) + for i in range(NUM_VARIABLE): + cov[i][i] = 1 + sqrt_cov = sqrtm(cov) + X = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) @ sqrt_cov + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + def test_decomposition(self): + # Try to estimate the covariance from a specially crafted matrix. + # The matrix is generated in the assumption that observations can be predicted by multiple factors. + NUM_VARIABLE = 30 + NUM_OBSERVATION = 100 + NUM_FACTOR = 10 + EPS = 0.1 + + estimator = StructuredCovEstimator(scale_return=False, assume_centered=True, num_factors=NUM_FACTOR) + + F = np.random.rand(NUM_VARIABLE, NUM_FACTOR) + B = np.random.rand(NUM_FACTOR, NUM_OBSERVATION) + U = np.random.rand(NUM_OBSERVATION, NUM_VARIABLE) + X = (F @ B).T + U + + est_cov = estimator.predict(X, is_price=False) + np_cov = np.cov(X.T) # While numpy assume row means variable, qlib assume the other wise. + + delta = abs(est_cov - np_cov) + if_identical = (delta < EPS).all() + + self.assertTrue(if_identical) + + +if __name__ == "__main__": + unittest.main()