mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-04 03:21:00 +08:00
Format with black
This commit is contained in:
@@ -1032,12 +1032,12 @@ def register_all_wrappers():
|
||||
"""register_all_wrappers"""
|
||||
logger = get_module_logger("data")
|
||||
module = get_module_by_module_path("qlib.data")
|
||||
|
||||
|
||||
_calendar_provider = init_instance_by_config(C.calendar_provider, module)
|
||||
if getattr(C, "calendar_cache", None) is not None:
|
||||
_calendar_cache_config = {}
|
||||
_calendar_cache_config.update(C.calendar_cache)
|
||||
_calendar_cache_config['kwargs'].update(provider=_calendar_provider)
|
||||
_calendar_cache_config["kwargs"].update(provider=_calendar_provider)
|
||||
_calendar_provider = init_instance_by_config(_calendar_cache_config, module)
|
||||
register_wrapper(Cal, _calendar_provider, "qlib.data")
|
||||
logger.debug(f"registering Cal {C.calendar_provider}-{C.calenar_cache}")
|
||||
@@ -1056,7 +1056,7 @@ def register_all_wrappers():
|
||||
if getattr(C, "expression_cache", None) is not None:
|
||||
_expression_cache_config = {}
|
||||
_expression_cache_config.update(C.expression_cache)
|
||||
_expression_cache_config['kwargs'].update(provider=_eprovider)
|
||||
_expression_cache_config["kwargs"].update(provider=_eprovider)
|
||||
_eprovider = init_instance_by_config(C.expression_cache, module)
|
||||
register_wrapper(ExpressionD, _eprovider, "qlib.data")
|
||||
logger.debug(f"registering ExpressioneD {C.expression_provider}-{C.expression_cache}")
|
||||
@@ -1065,7 +1065,7 @@ def register_all_wrappers():
|
||||
if getattr(C, "dataset_cache", None) is not None:
|
||||
_dataset_cache_config = {}
|
||||
_dataset_cache_config.update(C.dataset_cache)
|
||||
_dataset_cache_config['kwargs'].update(provider=_dprovider)
|
||||
_dataset_cache_config["kwargs"].update(provider=_dprovider)
|
||||
_dprovider = init_instance_by_config(_dataset_cache_config, module)
|
||||
register_wrapper(DatasetD, _dprovider, "qlib.data")
|
||||
logger.debug(f"registering DataseteD {C.dataset_provider}-{C.dataset_cache}")
|
||||
|
||||
@@ -125,7 +125,7 @@ class DataHandler(Serializable):
|
||||
selector: Union[pd.Timestamp, slice, str],
|
||||
level: Union[str, int] = "datetime",
|
||||
col_set: Union[str, List[str]] = CS_ALL,
|
||||
squeeze: bool = False
|
||||
squeeze: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
fetch data from underlying data source
|
||||
@@ -184,17 +184,18 @@ class DataHandler(Serializable):
|
||||
cur_date (pd.Timestamp or str): current date
|
||||
periods (int): number of periods
|
||||
"""
|
||||
trading_dates = self._data.index.unique(level='datetime')
|
||||
trading_dates = self._data.index.unique(level="datetime")
|
||||
cur_loc = trading_dates.get_loc(cur_date)
|
||||
pre_loc = cur_loc - periods + 1
|
||||
if pre_loc < 0:
|
||||
warnings.warn('`periods` is too large. the first date will be returned.')
|
||||
warnings.warn("`periods` is too large. the first date will be returned.")
|
||||
pre_loc = 0
|
||||
ref_date = trading_dates[pre_loc]
|
||||
return slice(ref_date, cur_date)
|
||||
|
||||
def get_range_iterator(self, periods: int, min_periods: Optional[int] = None,
|
||||
**kwargs) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
|
||||
def get_range_iterator(
|
||||
self, periods: int, min_periods: Optional[int] = None, **kwargs
|
||||
) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
|
||||
"""
|
||||
get a iterator of sliced data with given periods
|
||||
|
||||
@@ -203,7 +204,7 @@ class DataHandler(Serializable):
|
||||
min_periods (int): minimum periods for sliced dataframe
|
||||
kwargs (dict): will be passed to `self.fetch`
|
||||
"""
|
||||
trading_dates = self._data.index.unique(level='datetime')
|
||||
trading_dates = self._data.index.unique(level="datetime")
|
||||
if min_periods is None:
|
||||
min_periods = periods
|
||||
for cur_date in trading_dates[min_periods:]:
|
||||
|
||||
@@ -9,10 +9,12 @@ from typing import Tuple
|
||||
|
||||
from qlib.data import D
|
||||
|
||||
|
||||
class DataLoader(abc.ABC):
|
||||
'''
|
||||
"""
|
||||
DataLoader is designed for loading raw data from original data source.
|
||||
'''
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
|
||||
"""
|
||||
@@ -47,7 +49,8 @@ class DataLoader(abc.ABC):
|
||||
|
||||
|
||||
class QlibDataLoader(DataLoader):
|
||||
'''Same as QlibDataLoader. The fields can be define by config'''
|
||||
"""Same as QlibDataLoader. The fields can be define by config"""
|
||||
|
||||
def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None):
|
||||
"""
|
||||
Parameters
|
||||
@@ -64,7 +67,7 @@ class QlibDataLoader(DataLoader):
|
||||
|
||||
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
|
||||
"""
|
||||
self.is_group = isinstance(config, dict)
|
||||
self.is_group = isinstance(config, dict)
|
||||
|
||||
if self.is_group:
|
||||
self.fields = {grp: self._parse_fields_info(fields_info) for grp, fields_info in config.items()}
|
||||
@@ -86,15 +89,17 @@ class QlibDataLoader(DataLoader):
|
||||
if isinstance(instruments, str):
|
||||
instruments = D.instruments(instruments, filter_pipe=self.filter_pipe)
|
||||
elif self.filter_pipe is not None:
|
||||
warnings.warn('`filter_pipe` is not None, but it will not be used with `instruments` as list')
|
||||
warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")
|
||||
|
||||
def _get_df(exprs, names):
|
||||
df = D.features(instruments, exprs, start_time, end_time)
|
||||
df.columns = names
|
||||
return df
|
||||
|
||||
if self.is_group:
|
||||
df = pd.concat({grp: _get_df(exprs, names) for grp, (exprs, names) in self.fields.items()}, axis=1)
|
||||
else:
|
||||
exprs, names = self.fields
|
||||
df = _get_df(exprs, names)
|
||||
df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument>
|
||||
df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument>
|
||||
return df
|
||||
|
||||
@@ -701,7 +701,7 @@ class Rolling(ExpressionOps):
|
||||
if self.N == 0:
|
||||
return np.inf
|
||||
if 0 < self.N < 1:
|
||||
return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
|
||||
return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
|
||||
return self.feature.get_longest_back_rolling() + self.N - 1
|
||||
|
||||
def get_extended_window_size(self):
|
||||
|
||||
@@ -16,11 +16,11 @@ class RiskModel(BaseModel):
|
||||
A risk model is used to estimate the covariance matrix of stock returns.
|
||||
"""
|
||||
|
||||
MASK_NAN = 'mask'
|
||||
FILL_NAN = 'fill'
|
||||
IGNORE_NAN = 'ignore'
|
||||
MASK_NAN = "mask"
|
||||
FILL_NAN = "fill"
|
||||
IGNORE_NAN = "ignore"
|
||||
|
||||
def __init__(self, nan_option: str = 'ignore', assume_centered: bool = False, scale_return: bool = True):
|
||||
def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True):
|
||||
"""
|
||||
Args:
|
||||
nan_option (str): nan handling option (`ignore`/`mask`/`fill`)
|
||||
@@ -28,15 +28,19 @@ class RiskModel(BaseModel):
|
||||
scale_return (bool): whether scale returns as percentage
|
||||
"""
|
||||
# nan
|
||||
assert nan_option in [self.MASK_NAN, self.FILL_NAN, self.IGNORE_NAN], \
|
||||
f'`nan_option={nan_option}` is not supported'
|
||||
assert nan_option in [
|
||||
self.MASK_NAN,
|
||||
self.FILL_NAN,
|
||||
self.IGNORE_NAN,
|
||||
], f"`nan_option={nan_option}` is not supported"
|
||||
self.nan_option = nan_option
|
||||
|
||||
self.assume_centered = assume_centered
|
||||
self.scale_return = scale_return
|
||||
|
||||
def predict(self, X: Union[pd.Series, pd.DataFrame, np.ndarray],
|
||||
return_corr: bool = False, is_price: bool = True) -> Union[pd.DataFrame, np.ndarray]:
|
||||
def predict(
|
||||
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
|
||||
) -> Union[pd.DataFrame, np.ndarray]:
|
||||
"""
|
||||
Args:
|
||||
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
|
||||
@@ -53,18 +57,18 @@ class RiskModel(BaseModel):
|
||||
else:
|
||||
if isinstance(X.index, pd.MultiIndex):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.iloc[:, 0].unstack(level='instrument') # always use the first column
|
||||
X = X.iloc[:, 0].unstack(level="instrument") # always use the first column
|
||||
else:
|
||||
X = X.unstack(level='instrument')
|
||||
X = X.unstack(level="instrument")
|
||||
else:
|
||||
# X is 2D DataFrame
|
||||
pass
|
||||
columns = X.columns # will be used to restore dataframe
|
||||
columns = X.columns # will be used to restore dataframe
|
||||
X = X.values
|
||||
|
||||
# calculate pct_change
|
||||
if is_price:
|
||||
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
|
||||
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
|
||||
|
||||
# scale return
|
||||
if self.scale_return:
|
||||
@@ -106,7 +110,7 @@ class RiskModel(BaseModel):
|
||||
N = len(X)
|
||||
if isinstance(X, np.ma.MaskedArray):
|
||||
M = 1 - X.mask
|
||||
N = M.T.dot(M) # each pair has distinct number of samples
|
||||
N = M.T.dot(M) # each pair has distinct number of samples
|
||||
return xTx / N
|
||||
|
||||
def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
|
||||
@@ -165,14 +169,14 @@ class ShrinkCovEstimator(RiskModel):
|
||||
[7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip
|
||||
"""
|
||||
|
||||
SHR_LW = 'lw'
|
||||
SHR_OAS = 'oas'
|
||||
SHR_LW = "lw"
|
||||
SHR_OAS = "oas"
|
||||
|
||||
TGT_CONST_VAR = 'const_var'
|
||||
TGT_CONST_CORR = 'const_corr'
|
||||
TGT_SINGLE_FACTOR = 'single_factor'
|
||||
TGT_CONST_VAR = "const_var"
|
||||
TGT_CONST_CORR = "const_corr"
|
||||
TGT_SINGLE_FACTOR = "single_factor"
|
||||
|
||||
def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = 'const_var', **kwargs):
|
||||
def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs):
|
||||
"""
|
||||
Args:
|
||||
alpha (str or float): shrinking parameter or estimator (`lw`/`oas`)
|
||||
@@ -183,24 +187,26 @@ class ShrinkCovEstimator(RiskModel):
|
||||
|
||||
# alpha
|
||||
if isinstance(alpha, str):
|
||||
assert alpha in [self.SHR_LW, self.SHR_OAS], \
|
||||
f'shrinking method `{alpha}` is not supported'
|
||||
assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported"
|
||||
elif isinstance(alpha, (float, np.floating)):
|
||||
assert 0 <= alpha <= 1, 'alpha should be between [0, 1]'
|
||||
assert 0 <= alpha <= 1, "alpha should be between [0, 1]"
|
||||
else:
|
||||
raise TypeError('invalid argument type for `alpha`')
|
||||
raise TypeError("invalid argument type for `alpha`")
|
||||
self.alpha = alpha
|
||||
|
||||
# target
|
||||
if isinstance(target, str):
|
||||
assert target in [self.TGT_CONST_VAR, self.TGT_CONST_CORR, self.TGT_SINGLE_FACTOR], \
|
||||
f'shrinking target `{target} is not supported'
|
||||
assert target in [
|
||||
self.TGT_CONST_VAR,
|
||||
self.TGT_CONST_CORR,
|
||||
self.TGT_SINGLE_FACTOR,
|
||||
], f"shrinking target `{target} is not supported"
|
||||
elif isinstance(target, np.ndarray):
|
||||
pass
|
||||
else:
|
||||
raise TypeError('invalid argument type for `target`')
|
||||
raise TypeError("invalid argument type for `target`")
|
||||
if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR:
|
||||
raise NotImplementedError('currently `oas` can only support `const_var` as target')
|
||||
raise NotImplementedError("currently `oas` can only support `const_var` as target")
|
||||
self.target = target
|
||||
|
||||
def _predict(self, X: np.ndarray) -> np.ndarray:
|
||||
@@ -215,7 +221,7 @@ class ShrinkCovEstimator(RiskModel):
|
||||
|
||||
# shrink covariance
|
||||
if alpha > 0:
|
||||
S *= (1 - alpha)
|
||||
S *= 1 - alpha
|
||||
F *= alpha
|
||||
S += F
|
||||
|
||||
@@ -292,8 +298,8 @@ class ShrinkCovEstimator(RiskModel):
|
||||
alpha = A / B
|
||||
where `n`, `p` are the dim of observations and variables respectively.
|
||||
"""
|
||||
trS2 = np.sum(S**2)
|
||||
tr2S = np.trace(S)**2
|
||||
trS2 = np.sum(S ** 2)
|
||||
tr2S = np.trace(S) ** 2
|
||||
|
||||
n, p = X.shape
|
||||
|
||||
@@ -310,10 +316,10 @@ class ShrinkCovEstimator(RiskModel):
|
||||
"""
|
||||
t, n = X.shape
|
||||
|
||||
y = X**2
|
||||
phi = np.sum(y.T.dot(y) / t - S**2)
|
||||
y = X ** 2
|
||||
phi = np.sum(y.T.dot(y) / t - S ** 2)
|
||||
|
||||
gamma = np.linalg.norm(S - F, 'fro')**2
|
||||
gamma = np.linalg.norm(S - F, "fro") ** 2
|
||||
|
||||
kappa = phi / gamma
|
||||
alpha = max(0, min(1, kappa / t))
|
||||
@@ -331,15 +337,15 @@ class ShrinkCovEstimator(RiskModel):
|
||||
sqrt_var = np.sqrt(var)
|
||||
r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1))
|
||||
|
||||
y = X**2
|
||||
phi_mat = y.T.dot(y) / t - S**2
|
||||
y = X ** 2
|
||||
phi_mat = y.T.dot(y) / t - S ** 2
|
||||
phi = np.sum(phi_mat)
|
||||
|
||||
theta_mat = (X**3).T.dot(X) / t - var[:, None] * S
|
||||
theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S
|
||||
np.fill_diagonal(theta_mat, 0)
|
||||
rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat)
|
||||
|
||||
gamma = np.linalg.norm(S - F, 'fro')**2
|
||||
gamma = np.linalg.norm(S - F, "fro") ** 2
|
||||
|
||||
kappa = (phi - rho) / gamma
|
||||
alpha = max(0, min(1, kappa / t))
|
||||
@@ -357,19 +363,21 @@ class ShrinkCovEstimator(RiskModel):
|
||||
cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
|
||||
var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
|
||||
|
||||
y = X**2
|
||||
phi = np.sum(y.T.dot(y)) / t - np.sum(S**2)
|
||||
y = X ** 2
|
||||
phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2)
|
||||
|
||||
rdiag = np.sum(y**2) / t - np.sum(np.diag(S)**2)
|
||||
rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2)
|
||||
z = X * X_mkt[:, None]
|
||||
v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
|
||||
roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
|
||||
v3 = z.T.dot(z) / t - var_mkt * S
|
||||
roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
|
||||
roff3 = (
|
||||
np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
|
||||
)
|
||||
roff = 2 * roff1 - roff3
|
||||
rho = rdiag + roff
|
||||
|
||||
gamma = np.linalg.norm(S - F, 'fro')**2
|
||||
gamma = np.linalg.norm(S - F, "fro") ** 2
|
||||
|
||||
kappa = (phi - rho) / gamma
|
||||
alpha = max(0, min(1, kappa / t))
|
||||
@@ -386,11 +394,11 @@ class POETCovEstimator(RiskModel):
|
||||
[2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m
|
||||
"""
|
||||
|
||||
THRESH_SOFT = 'soft'
|
||||
THRESH_HARD = 'hard'
|
||||
THRESH_SCAD = 'scad'
|
||||
THRESH_SOFT = "soft"
|
||||
THRESH_HARD = "hard"
|
||||
THRESH_SCAD = "scad"
|
||||
|
||||
def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = 'soft', **kwargs):
|
||||
def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs):
|
||||
"""
|
||||
Args:
|
||||
num_factors (int): number of factors (if set to zero, no factor model will be used)
|
||||
@@ -403,25 +411,28 @@ class POETCovEstimator(RiskModel):
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
assert num_factors >= 0, '`num_factors` requires a positive integer'
|
||||
assert num_factors >= 0, "`num_factors` requires a positive integer"
|
||||
self.num_factors = num_factors
|
||||
|
||||
assert thresh >= 0, '`thresh` requires a positive float number'
|
||||
assert thresh >= 0, "`thresh` requires a positive float number"
|
||||
self.thresh = thresh
|
||||
|
||||
assert thresh_method in [self.THRESH_HARD, self.THRESH_SOFT, self.THRESH_SCAD], \
|
||||
'`thresh_method` should be `soft`/`hard`/`scad`'
|
||||
assert thresh_method in [
|
||||
self.THRESH_HARD,
|
||||
self.THRESH_SOFT,
|
||||
self.THRESH_SCAD,
|
||||
], "`thresh_method` should be `soft`/`hard`/`scad`"
|
||||
self.thresh_method = thresh_method
|
||||
|
||||
def _predict(self, X: np.ndarray) -> np.ndarray:
|
||||
|
||||
Y = X.T # NOTE: to match POET's implementation
|
||||
Y = X.T # NOTE: to match POET's implementation
|
||||
p, n = Y.shape
|
||||
|
||||
if self.num_factors > 0:
|
||||
Dd, V = np.linalg.eig(Y.T.dot(Y))
|
||||
V = V[:, np.argsort(Dd)]
|
||||
F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
|
||||
F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
|
||||
LamPCA = Y.dot(F) / n
|
||||
uhat = np.asarray(Y - LamPCA.dot(F.T))
|
||||
Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
|
||||
@@ -434,12 +445,12 @@ class POETCovEstimator(RiskModel):
|
||||
lamb = rate * self.thresh
|
||||
SuPCA = uhat.dot(uhat.T) / n
|
||||
SuDiag = np.diag(np.diag(SuPCA))
|
||||
R = np.linalg.inv(SuDiag**0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag**0.5))
|
||||
R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5))
|
||||
|
||||
if self.thresh_method == self.THRESH_HARD:
|
||||
M = R * (np.abs(R) > lamb)
|
||||
elif self.thresh_method == self.THRESH_SOFT:
|
||||
res = (np.abs(R) - lamb)
|
||||
res = np.abs(R) - lamb
|
||||
res = (res + np.abs(res)) / 2
|
||||
M = np.sign(R) * res
|
||||
else:
|
||||
@@ -449,7 +460,7 @@ class POETCovEstimator(RiskModel):
|
||||
M = M1 + M2 + M3
|
||||
|
||||
Rthresh = M - np.diag(np.diag(M)) + np.eye(p)
|
||||
SigmaU = (SuDiag**0.5).dot(Rthresh).dot(SuDiag**0.5)
|
||||
SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5)
|
||||
SigmaY = SigmaU + Lowrank
|
||||
|
||||
return SigmaY
|
||||
|
||||
@@ -22,13 +22,20 @@ class PortfolioOptimizer(object):
|
||||
This optimizer always assumes full investment and no-shorting.
|
||||
"""
|
||||
|
||||
OPT_GMV = 'gmv'
|
||||
OPT_MVO = 'mvo'
|
||||
OPT_RP = 'rp'
|
||||
OPT_INV = 'inv'
|
||||
OPT_GMV = "gmv"
|
||||
OPT_MVO = "mvo"
|
||||
OPT_RP = "rp"
|
||||
OPT_INV = "inv"
|
||||
|
||||
def __init__(self, method: str = 'inv', lamb: float = 0, delta: float = 0,
|
||||
alpha: float = 0.0, scale_alpha: bool = True, tol: float = 1e-8):
|
||||
def __init__(
|
||||
self,
|
||||
method: str = "inv",
|
||||
lamb: float = 0,
|
||||
delta: float = 0,
|
||||
alpha: float = 0.0,
|
||||
scale_alpha: bool = True,
|
||||
tol: float = 1e-8,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
method (str): portfolio optimization method
|
||||
@@ -37,24 +44,26 @@ class PortfolioOptimizer(object):
|
||||
alpha (float): l2 norm regularizer
|
||||
tol (float): tolerance for optimization termination
|
||||
"""
|
||||
assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], \
|
||||
f'method `{method}` is not supported'
|
||||
assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
|
||||
self.method = method
|
||||
|
||||
assert lamb >= 0, f'risk aversion parameter `lamb` should be positive'
|
||||
assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
|
||||
self.lamb = lamb
|
||||
|
||||
assert delta >= 0, f'turnover limit `delta` should be positive'
|
||||
assert delta >= 0, f"turnover limit `delta` should be positive"
|
||||
self.delta = delta
|
||||
|
||||
assert alpha >= 0, f'l2 norm regularizer `alpha` should be positive'
|
||||
assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
|
||||
self.alpha = alpha
|
||||
|
||||
self.tol = tol
|
||||
|
||||
def __call__(self, S: Union[np.ndarray, pd.DataFrame],
|
||||
u: Optional[Union[np.ndarray, pd.Series]] = None,
|
||||
w0: Optional[Union[np.ndarray, pd.Series]] = None) -> Union[np.ndarray, pd.Series]:
|
||||
def __call__(
|
||||
self,
|
||||
S: Union[np.ndarray, pd.DataFrame],
|
||||
u: Optional[Union[np.ndarray, pd.Series]] = None,
|
||||
w0: Optional[Union[np.ndarray, pd.Series]] = None,
|
||||
) -> Union[np.ndarray, pd.Series]:
|
||||
"""
|
||||
Args:
|
||||
S (np.ndarray or pd.DataFrame): covariance matrix
|
||||
@@ -72,22 +81,22 @@ class PortfolioOptimizer(object):
|
||||
|
||||
# transform alpha
|
||||
if u is not None:
|
||||
assert len(u) == len(S), '`u` has mismatched shape'
|
||||
assert len(u) == len(S), "`u` has mismatched shape"
|
||||
if isinstance(u, pd.Series):
|
||||
assert all(u.index == index), '`u` has mismatched index'
|
||||
assert all(u.index == index), "`u` has mismatched index"
|
||||
u = u.values
|
||||
|
||||
# transform initial weights
|
||||
if w0 is not None:
|
||||
assert len(w0) == len(S), '`w0` has mismatched shape'
|
||||
assert len(w0) == len(S), "`w0` has mismatched shape"
|
||||
if isinstance(w0, pd.Series):
|
||||
assert all(w0.index == index), '`w0` has mismatched index'
|
||||
assert all(w0.index == index), "`w0` has mismatched index"
|
||||
w0 = w0.values
|
||||
|
||||
# scale alpha to match volatility
|
||||
if u is not None:
|
||||
u = u / u.std()
|
||||
u *= np.mean(np.diag(S))**0.5
|
||||
u *= np.mean(np.diag(S)) ** 0.5
|
||||
|
||||
# optimize
|
||||
w = self._optimize(S, u, w0)
|
||||
@@ -98,21 +107,20 @@ class PortfolioOptimizer(object):
|
||||
|
||||
return w
|
||||
|
||||
def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None,
|
||||
w0: Optional[np.ndarray] = None) -> np.ndarray:
|
||||
def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
|
||||
|
||||
# inverse volatility
|
||||
if self.method == self.OPT_INV:
|
||||
if u is not None:
|
||||
warnings.warn('`u` is set but will not be used for `inv` portfolio')
|
||||
warnings.warn("`u` is set but will not be used for `inv` portfolio")
|
||||
if w0 is not None:
|
||||
warnings.warn('`w0` is set but will not be used for `inv` portfolio')
|
||||
warnings.warn("`w0` is set but will not be used for `inv` portfolio")
|
||||
return self._optimize_inv(S)
|
||||
|
||||
# global minimum variance
|
||||
if self.method == self.OPT_GMV:
|
||||
if u is not None:
|
||||
warnings.warn('`u` is set but will not be used for `gmv` portfolio')
|
||||
warnings.warn("`u` is set but will not be used for `gmv` portfolio")
|
||||
return self._optimize_gmv(S, w0)
|
||||
|
||||
# mean-variance
|
||||
@@ -122,12 +130,12 @@ class PortfolioOptimizer(object):
|
||||
# risk parity
|
||||
if self.method == self.OPT_RP:
|
||||
if u is not None:
|
||||
warnings.warn('`u` is set but will not be used for `rp` portfolio')
|
||||
warnings.warn("`u` is set but will not be used for `rp` portfolio")
|
||||
return self._optimize_rp(S, w0)
|
||||
|
||||
def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
|
||||
"""Inverse volatility"""
|
||||
vola = np.diag(S)**0.5
|
||||
vola = np.diag(S) ** 0.5
|
||||
w = 1 / vola
|
||||
w /= w.sum()
|
||||
return w
|
||||
@@ -140,14 +148,11 @@ class PortfolioOptimizer(object):
|
||||
s.t. w >= 0, sum(w) == 1
|
||||
where `S` is the covariance matrix.
|
||||
"""
|
||||
return self._solve(
|
||||
len(S),
|
||||
self._get_objective_gmv(S),
|
||||
*self._get_constrains(w0)
|
||||
)
|
||||
return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
|
||||
|
||||
def _optimize_mvo(self, S: np.ndarray, u: Optional[np.ndarray] = None,
|
||||
w0: Optional[np.ndarray] = None) -> np.ndarray:
|
||||
def _optimize_mvo(
|
||||
self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
|
||||
) -> np.ndarray:
|
||||
"""optimize mean-variance portfolio
|
||||
|
||||
This method solves the following optimization problem
|
||||
@@ -156,11 +161,7 @@ class PortfolioOptimizer(object):
|
||||
where `S` is the covariance matrix, `u` is the expected returns,
|
||||
and `lamb` is the risk aversion parameter.
|
||||
"""
|
||||
return self._solve(
|
||||
len(S),
|
||||
self._get_objective_mvo(S, u),
|
||||
*self._get_constrains(w0)
|
||||
)
|
||||
return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
|
||||
|
||||
def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
|
||||
"""optimize risk parity portfolio
|
||||
@@ -170,11 +171,7 @@ class PortfolioOptimizer(object):
|
||||
s.t. w >= 0, sum(w) == 1
|
||||
where `S` is the covariance matrix and `N` is the number of stocks.
|
||||
"""
|
||||
return self._solve(
|
||||
len(S),
|
||||
self._get_objective_rp(S),
|
||||
*self._get_constrains(w0)
|
||||
)
|
||||
return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
|
||||
|
||||
def _get_objective_gmv(self, S: np.ndarray) -> np.ndarray:
|
||||
"""global minimum variance optimization objective
|
||||
@@ -213,7 +210,7 @@ class PortfolioOptimizer(object):
|
||||
N = len(x)
|
||||
Sx = S @ x
|
||||
xSx = x @ Sx
|
||||
return np.sum((x - xSx / Sx / N)**2)
|
||||
return np.sum((x - xSx / Sx / N) ** 2)
|
||||
|
||||
return func
|
||||
|
||||
@@ -230,15 +227,11 @@ class PortfolioOptimizer(object):
|
||||
bounds = so.Bounds(0.0, 1.0)
|
||||
|
||||
# full investment constraint
|
||||
cons = [
|
||||
{'type': 'eq', 'fun': lambda x: np.sum(x) - 1} # == 0
|
||||
]
|
||||
cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0
|
||||
|
||||
# turnover constraint
|
||||
if w0 is not None:
|
||||
cons.append(
|
||||
{'type': 'ineq', 'fun': lambda x: self.delta - np.sum(np.abs(x - w0))} # >= 0
|
||||
)
|
||||
cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0
|
||||
|
||||
return bounds, cons
|
||||
|
||||
@@ -257,9 +250,9 @@ class PortfolioOptimizer(object):
|
||||
wrapped_obj = lambda x: obj(x) + self.alpha * np.sum(np.square(x))
|
||||
|
||||
# solve
|
||||
x0 = np.ones(n) / n # init results
|
||||
x0 = np.ones(n) / n # init results
|
||||
sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
|
||||
if not sol.success:
|
||||
warnings.warn(f'optimization not success ({sol.status})')
|
||||
warnings.warn(f"optimization not success ({sol.status})")
|
||||
|
||||
return sol.x
|
||||
|
||||
@@ -10,6 +10,7 @@ class QlibRecorder:
|
||||
"""
|
||||
A global system that helps to manage the experiments.
|
||||
"""
|
||||
|
||||
def __init__(self, exp_manager, uri):
|
||||
self.exp_manager = exp_manager
|
||||
self.uri = uri
|
||||
@@ -20,7 +21,7 @@ class QlibRecorder:
|
||||
try:
|
||||
yield run
|
||||
except:
|
||||
self.end_exp() # end the experiment if something went wrong
|
||||
self.end_exp() # end the experiment if something went wrong
|
||||
self.end_exp()
|
||||
|
||||
def start_exp(self, experiment_name=None):
|
||||
|
||||
@@ -5,6 +5,7 @@ import mlflow
|
||||
from pathlib import Path
|
||||
from .recorder import MLflowRecorder
|
||||
|
||||
|
||||
class Experiment:
|
||||
"""
|
||||
Thie is the `Experiment` class for each experiment being run. The API is designed
|
||||
@@ -21,7 +22,7 @@ class Experiment:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
A recorder instance.
|
||||
@@ -84,4 +85,4 @@ class MLflowExperiment(Experiment):
|
||||
|
||||
def delete_recorder(self, rid):
|
||||
mlflow.delete_run(rid)
|
||||
self.recorders = [r for r in self.recorders if r.recorder_id == rid]
|
||||
self.recorders = [r for r in self.recorders if r.recorder_id == rid]
|
||||
|
||||
@@ -9,7 +9,8 @@ from .exp import MLflowExperiment
|
||||
from .recorder import MLflowRecorder
|
||||
from ..log import get_module_logger
|
||||
|
||||
logger = get_module_logger('workflow', 'Warning')
|
||||
logger = get_module_logger("workflow", "Warning")
|
||||
|
||||
|
||||
class ExpManager:
|
||||
"""
|
||||
|
||||
@@ -50,23 +50,24 @@ class SignalRecord(RecordTemp):
|
||||
def generate(self, **kwargs):
|
||||
# generate prediciton
|
||||
pred = self.model.predict(self.dataset)
|
||||
self.recorder.save_object(pred, 'pred.pkl')
|
||||
|
||||
self.recorder.save_object(pred, "pred.pkl")
|
||||
|
||||
def load(self):
|
||||
# try to load the saved object
|
||||
try:
|
||||
pred = self.recorder.load_object('pred.pkl')
|
||||
pred = self.recorder.load_object("pred.pkl")
|
||||
return pred
|
||||
except:
|
||||
raise Exception('Something went wrong when loading the saved object.')
|
||||
raise Exception("Something went wrong when loading the saved object.")
|
||||
|
||||
def check(self, **kwargs):
|
||||
return self.recorder.check('pred.pkl')
|
||||
return self.recorder.check("pred.pkl")
|
||||
|
||||
|
||||
# TODO
|
||||
class SigAnaRecord(SignalRecord):
|
||||
def __init__(self, recorder, **kwargs):
|
||||
pass
|
||||
|
||||
def generate(self):
|
||||
pass
|
||||
@@ -85,7 +86,7 @@ class PortAnaRecord(SignalRecord):
|
||||
self.BACKTEST_CONFIG = BACKTEST_CONFIG
|
||||
module = get_module_by_module_path("qlib.contrib.strategy")
|
||||
self.strategy = init_instance_by_config(STRATEGY_CONFIG, module)
|
||||
self.artifact_path = Path('portfolio_analysis').resolve()
|
||||
self.artifact_path = Path("portfolio_analysis").resolve()
|
||||
|
||||
def generate(self, **kwargs):
|
||||
"""
|
||||
@@ -99,8 +100,8 @@ class PortAnaRecord(SignalRecord):
|
||||
# custom strategy and get backtest
|
||||
pred_score = super().load()
|
||||
report_normal, positions_normal = normal_backtest(pred_score, strategy=self.strategy, **self.BACKTEST_CONFIG)
|
||||
self.recorder.save_object(report_normal, 'report_normal.pkl', self.artifact_path)
|
||||
self.recorder.save_object(positions_normal, 'positions_normal.pkl', self.artifact_path)
|
||||
self.recorder.save_object(report_normal, "report_normal.pkl", self.artifact_path)
|
||||
self.recorder.save_object(positions_normal, "positions_normal.pkl", self.artifact_path)
|
||||
|
||||
# analysis
|
||||
analysis = dict()
|
||||
@@ -109,29 +110,15 @@ class PortAnaRecord(SignalRecord):
|
||||
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
|
||||
)
|
||||
analysis_df = pd.concat(analysis) # type: pd.DataFrame
|
||||
self.recorder.save_object(pred, 'port_analysis.pkl', self.artifact_path)
|
||||
self.recorder.save_object(pred, "port_analysis.pkl", self.artifact_path)
|
||||
|
||||
def load(self):
|
||||
# try to load the saved object
|
||||
try:
|
||||
pred = self.recorder.load_object(self.artifact_path / 'port_analysis.pkl'')
|
||||
pred = self.recorder.load_object(self.artifact_path / "port_analysis.pkl")
|
||||
return pred
|
||||
except:
|
||||
raise Exception('Something went wrong when loading the saved object.')
|
||||
raise Exception("Something went wrong when loading the saved object.")
|
||||
|
||||
def check(self):
|
||||
return self.recorder.check('port_analysis.pkl', self.artifact_path)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return self.recorder.check("port_analysis.pkl", self.artifact_path)
|
||||
|
||||
@@ -166,7 +166,7 @@ class MLflowRecorder(Recorder):
|
||||
# save the run id and artifact_uri
|
||||
self.recorder_id = run.info.run_id
|
||||
self.artifact_uri = run.info.artifact_uri
|
||||
self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder
|
||||
self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder
|
||||
# set up file manager for saving objects
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.fm = FileManager(Path(self.temp_dir).absolute())
|
||||
@@ -238,7 +238,7 @@ class MLflowRecorder(Recorder):
|
||||
def check(self, name, path=None):
|
||||
client = mlflow.tracking.MlflowClient(tracking_uri=self._uri)
|
||||
artifacts = client.list_artifacts(self.recorder_id, path)
|
||||
for artifact in artifacts
|
||||
for artifact in artifacts:
|
||||
if name in artifact.path:
|
||||
return True
|
||||
return False
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user