1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 03:21:00 +08:00

Format with black

This commit is contained in:
Jactus
2020-11-02 11:09:24 +08:00
parent b077d848f4
commit 661b3bffcc
11 changed files with 157 additions and 157 deletions

View File

@@ -1032,12 +1032,12 @@ def register_all_wrappers():
"""register_all_wrappers"""
logger = get_module_logger("data")
module = get_module_by_module_path("qlib.data")
_calendar_provider = init_instance_by_config(C.calendar_provider, module)
if getattr(C, "calendar_cache", None) is not None:
_calendar_cache_config = {}
_calendar_cache_config.update(C.calendar_cache)
_calendar_cache_config['kwargs'].update(provider=_calendar_provider)
_calendar_cache_config["kwargs"].update(provider=_calendar_provider)
_calendar_provider = init_instance_by_config(_calendar_cache_config, module)
register_wrapper(Cal, _calendar_provider, "qlib.data")
logger.debug(f"registering Cal {C.calendar_provider}-{C.calenar_cache}")
@@ -1056,7 +1056,7 @@ def register_all_wrappers():
if getattr(C, "expression_cache", None) is not None:
_expression_cache_config = {}
_expression_cache_config.update(C.expression_cache)
_expression_cache_config['kwargs'].update(provider=_eprovider)
_expression_cache_config["kwargs"].update(provider=_eprovider)
_eprovider = init_instance_by_config(C.expression_cache, module)
register_wrapper(ExpressionD, _eprovider, "qlib.data")
logger.debug(f"registering ExpressioneD {C.expression_provider}-{C.expression_cache}")
@@ -1065,7 +1065,7 @@ def register_all_wrappers():
if getattr(C, "dataset_cache", None) is not None:
_dataset_cache_config = {}
_dataset_cache_config.update(C.dataset_cache)
_dataset_cache_config['kwargs'].update(provider=_dprovider)
_dataset_cache_config["kwargs"].update(provider=_dprovider)
_dprovider = init_instance_by_config(_dataset_cache_config, module)
register_wrapper(DatasetD, _dprovider, "qlib.data")
logger.debug(f"registering DataseteD {C.dataset_provider}-{C.dataset_cache}")

View File

@@ -125,7 +125,7 @@ class DataHandler(Serializable):
selector: Union[pd.Timestamp, slice, str],
level: Union[str, int] = "datetime",
col_set: Union[str, List[str]] = CS_ALL,
squeeze: bool = False
squeeze: bool = False,
) -> pd.DataFrame:
"""
fetch data from underlying data source
@@ -184,17 +184,18 @@ class DataHandler(Serializable):
cur_date (pd.Timestamp or str): current date
periods (int): number of periods
"""
trading_dates = self._data.index.unique(level='datetime')
trading_dates = self._data.index.unique(level="datetime")
cur_loc = trading_dates.get_loc(cur_date)
pre_loc = cur_loc - periods + 1
if pre_loc < 0:
warnings.warn('`periods` is too large. the first date will be returned.')
warnings.warn("`periods` is too large. the first date will be returned.")
pre_loc = 0
ref_date = trading_dates[pre_loc]
return slice(ref_date, cur_date)
def get_range_iterator(self, periods: int, min_periods: Optional[int] = None,
**kwargs) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
def get_range_iterator(
self, periods: int, min_periods: Optional[int] = None, **kwargs
) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
"""
get a iterator of sliced data with given periods
@@ -203,7 +204,7 @@ class DataHandler(Serializable):
min_periods (int): minimum periods for sliced dataframe
kwargs (dict): will be passed to `self.fetch`
"""
trading_dates = self._data.index.unique(level='datetime')
trading_dates = self._data.index.unique(level="datetime")
if min_periods is None:
min_periods = periods
for cur_date in trading_dates[min_periods:]:

View File

@@ -9,10 +9,12 @@ from typing import Tuple
from qlib.data import D
class DataLoader(abc.ABC):
'''
"""
DataLoader is designed for loading raw data from original data source.
'''
"""
@abc.abstractmethod
def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
"""
@@ -47,7 +49,8 @@ class DataLoader(abc.ABC):
class QlibDataLoader(DataLoader):
'''Same as QlibDataLoader. The fields can be define by config'''
"""Same as QlibDataLoader. The fields can be define by config"""
def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None):
"""
Parameters
@@ -64,7 +67,7 @@ class QlibDataLoader(DataLoader):
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
"""
self.is_group = isinstance(config, dict)
self.is_group = isinstance(config, dict)
if self.is_group:
self.fields = {grp: self._parse_fields_info(fields_info) for grp, fields_info in config.items()}
@@ -86,15 +89,17 @@ class QlibDataLoader(DataLoader):
if isinstance(instruments, str):
instruments = D.instruments(instruments, filter_pipe=self.filter_pipe)
elif self.filter_pipe is not None:
warnings.warn('`filter_pipe` is not None, but it will not be used with `instruments` as list')
warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")
def _get_df(exprs, names):
df = D.features(instruments, exprs, start_time, end_time)
df.columns = names
return df
if self.is_group:
df = pd.concat({grp: _get_df(exprs, names) for grp, (exprs, names) in self.fields.items()}, axis=1)
else:
exprs, names = self.fields
df = _get_df(exprs, names)
df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument>
df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument>
return df

View File

@@ -701,7 +701,7 @@ class Rolling(ExpressionOps):
if self.N == 0:
return np.inf
if 0 < self.N < 1:
return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
return self.feature.get_longest_back_rolling() + self.N - 1
def get_extended_window_size(self):

View File

@@ -16,11 +16,11 @@ class RiskModel(BaseModel):
A risk model is used to estimate the covariance matrix of stock returns.
"""
MASK_NAN = 'mask'
FILL_NAN = 'fill'
IGNORE_NAN = 'ignore'
MASK_NAN = "mask"
FILL_NAN = "fill"
IGNORE_NAN = "ignore"
def __init__(self, nan_option: str = 'ignore', assume_centered: bool = False, scale_return: bool = True):
def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True):
"""
Args:
nan_option (str): nan handling option (`ignore`/`mask`/`fill`)
@@ -28,15 +28,19 @@ class RiskModel(BaseModel):
scale_return (bool): whether scale returns as percentage
"""
# nan
assert nan_option in [self.MASK_NAN, self.FILL_NAN, self.IGNORE_NAN], \
f'`nan_option={nan_option}` is not supported'
assert nan_option in [
self.MASK_NAN,
self.FILL_NAN,
self.IGNORE_NAN,
], f"`nan_option={nan_option}` is not supported"
self.nan_option = nan_option
self.assume_centered = assume_centered
self.scale_return = scale_return
def predict(self, X: Union[pd.Series, pd.DataFrame, np.ndarray],
return_corr: bool = False, is_price: bool = True) -> Union[pd.DataFrame, np.ndarray]:
def predict(
self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True
) -> Union[pd.DataFrame, np.ndarray]:
"""
Args:
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
@@ -53,18 +57,18 @@ class RiskModel(BaseModel):
else:
if isinstance(X.index, pd.MultiIndex):
if isinstance(X, pd.DataFrame):
X = X.iloc[:, 0].unstack(level='instrument') # always use the first column
X = X.iloc[:, 0].unstack(level="instrument") # always use the first column
else:
X = X.unstack(level='instrument')
X = X.unstack(level="instrument")
else:
# X is 2D DataFrame
pass
columns = X.columns # will be used to restore dataframe
columns = X.columns # will be used to restore dataframe
X = X.values
# calculate pct_change
if is_price:
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows
# scale return
if self.scale_return:
@@ -106,7 +110,7 @@ class RiskModel(BaseModel):
N = len(X)
if isinstance(X, np.ma.MaskedArray):
M = 1 - X.mask
N = M.T.dot(M) # each pair has distinct number of samples
N = M.T.dot(M) # each pair has distinct number of samples
return xTx / N
def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
@@ -165,14 +169,14 @@ class ShrinkCovEstimator(RiskModel):
[7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip
"""
SHR_LW = 'lw'
SHR_OAS = 'oas'
SHR_LW = "lw"
SHR_OAS = "oas"
TGT_CONST_VAR = 'const_var'
TGT_CONST_CORR = 'const_corr'
TGT_SINGLE_FACTOR = 'single_factor'
TGT_CONST_VAR = "const_var"
TGT_CONST_CORR = "const_corr"
TGT_SINGLE_FACTOR = "single_factor"
def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = 'const_var', **kwargs):
def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs):
"""
Args:
alpha (str or float): shrinking parameter or estimator (`lw`/`oas`)
@@ -183,24 +187,26 @@ class ShrinkCovEstimator(RiskModel):
# alpha
if isinstance(alpha, str):
assert alpha in [self.SHR_LW, self.SHR_OAS], \
f'shrinking method `{alpha}` is not supported'
assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported"
elif isinstance(alpha, (float, np.floating)):
assert 0 <= alpha <= 1, 'alpha should be between [0, 1]'
assert 0 <= alpha <= 1, "alpha should be between [0, 1]"
else:
raise TypeError('invalid argument type for `alpha`')
raise TypeError("invalid argument type for `alpha`")
self.alpha = alpha
# target
if isinstance(target, str):
assert target in [self.TGT_CONST_VAR, self.TGT_CONST_CORR, self.TGT_SINGLE_FACTOR], \
f'shrinking target `{target} is not supported'
assert target in [
self.TGT_CONST_VAR,
self.TGT_CONST_CORR,
self.TGT_SINGLE_FACTOR,
], f"shrinking target `{target} is not supported"
elif isinstance(target, np.ndarray):
pass
else:
raise TypeError('invalid argument type for `target`')
raise TypeError("invalid argument type for `target`")
if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR:
raise NotImplementedError('currently `oas` can only support `const_var` as target')
raise NotImplementedError("currently `oas` can only support `const_var` as target")
self.target = target
def _predict(self, X: np.ndarray) -> np.ndarray:
@@ -215,7 +221,7 @@ class ShrinkCovEstimator(RiskModel):
# shrink covariance
if alpha > 0:
S *= (1 - alpha)
S *= 1 - alpha
F *= alpha
S += F
@@ -292,8 +298,8 @@ class ShrinkCovEstimator(RiskModel):
alpha = A / B
where `n`, `p` are the dim of observations and variables respectively.
"""
trS2 = np.sum(S**2)
tr2S = np.trace(S)**2
trS2 = np.sum(S ** 2)
tr2S = np.trace(S) ** 2
n, p = X.shape
@@ -310,10 +316,10 @@ class ShrinkCovEstimator(RiskModel):
"""
t, n = X.shape
y = X**2
phi = np.sum(y.T.dot(y) / t - S**2)
y = X ** 2
phi = np.sum(y.T.dot(y) / t - S ** 2)
gamma = np.linalg.norm(S - F, 'fro')**2
gamma = np.linalg.norm(S - F, "fro") ** 2
kappa = phi / gamma
alpha = max(0, min(1, kappa / t))
@@ -331,15 +337,15 @@ class ShrinkCovEstimator(RiskModel):
sqrt_var = np.sqrt(var)
r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1))
y = X**2
phi_mat = y.T.dot(y) / t - S**2
y = X ** 2
phi_mat = y.T.dot(y) / t - S ** 2
phi = np.sum(phi_mat)
theta_mat = (X**3).T.dot(X) / t - var[:, None] * S
theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S
np.fill_diagonal(theta_mat, 0)
rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat)
gamma = np.linalg.norm(S - F, 'fro')**2
gamma = np.linalg.norm(S - F, "fro") ** 2
kappa = (phi - rho) / gamma
alpha = max(0, min(1, kappa / t))
@@ -357,19 +363,21 @@ class ShrinkCovEstimator(RiskModel):
cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X))
var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X))
y = X**2
phi = np.sum(y.T.dot(y)) / t - np.sum(S**2)
y = X ** 2
phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2)
rdiag = np.sum(y**2) / t - np.sum(np.diag(S)**2)
rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2)
z = X * X_mkt[:, None]
v1 = y.T.dot(z) / t - cov_mkt[:, None] * S
roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt
v3 = z.T.dot(z) / t - var_mkt * S
roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2
roff3 = (
np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2
)
roff = 2 * roff1 - roff3
rho = rdiag + roff
gamma = np.linalg.norm(S - F, 'fro')**2
gamma = np.linalg.norm(S - F, "fro") ** 2
kappa = (phi - rho) / gamma
alpha = max(0, min(1, kappa / t))
@@ -386,11 +394,11 @@ class POETCovEstimator(RiskModel):
[2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m
"""
THRESH_SOFT = 'soft'
THRESH_HARD = 'hard'
THRESH_SCAD = 'scad'
THRESH_SOFT = "soft"
THRESH_HARD = "hard"
THRESH_SCAD = "scad"
def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = 'soft', **kwargs):
def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs):
"""
Args:
num_factors (int): number of factors (if set to zero, no factor model will be used)
@@ -403,25 +411,28 @@ class POETCovEstimator(RiskModel):
"""
super().__init__(**kwargs)
assert num_factors >= 0, '`num_factors` requires a positive integer'
assert num_factors >= 0, "`num_factors` requires a positive integer"
self.num_factors = num_factors
assert thresh >= 0, '`thresh` requires a positive float number'
assert thresh >= 0, "`thresh` requires a positive float number"
self.thresh = thresh
assert thresh_method in [self.THRESH_HARD, self.THRESH_SOFT, self.THRESH_SCAD], \
'`thresh_method` should be `soft`/`hard`/`scad`'
assert thresh_method in [
self.THRESH_HARD,
self.THRESH_SOFT,
self.THRESH_SCAD,
], "`thresh_method` should be `soft`/`hard`/`scad`"
self.thresh_method = thresh_method
def _predict(self, X: np.ndarray) -> np.ndarray:
Y = X.T # NOTE: to match POET's implementation
Y = X.T # NOTE: to match POET's implementation
p, n = Y.shape
if self.num_factors > 0:
Dd, V = np.linalg.eig(Y.T.dot(Y))
V = V[:, np.argsort(Dd)]
F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n)
F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n)
LamPCA = Y.dot(F) / n
uhat = np.asarray(Y - LamPCA.dot(F.T))
Lowrank = np.asarray(LamPCA.dot(LamPCA.T))
@@ -434,12 +445,12 @@ class POETCovEstimator(RiskModel):
lamb = rate * self.thresh
SuPCA = uhat.dot(uhat.T) / n
SuDiag = np.diag(np.diag(SuPCA))
R = np.linalg.inv(SuDiag**0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag**0.5))
R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5))
if self.thresh_method == self.THRESH_HARD:
M = R * (np.abs(R) > lamb)
elif self.thresh_method == self.THRESH_SOFT:
res = (np.abs(R) - lamb)
res = np.abs(R) - lamb
res = (res + np.abs(res)) / 2
M = np.sign(R) * res
else:
@@ -449,7 +460,7 @@ class POETCovEstimator(RiskModel):
M = M1 + M2 + M3
Rthresh = M - np.diag(np.diag(M)) + np.eye(p)
SigmaU = (SuDiag**0.5).dot(Rthresh).dot(SuDiag**0.5)
SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5)
SigmaY = SigmaU + Lowrank
return SigmaY

View File

@@ -22,13 +22,20 @@ class PortfolioOptimizer(object):
This optimizer always assumes full investment and no-shorting.
"""
OPT_GMV = 'gmv'
OPT_MVO = 'mvo'
OPT_RP = 'rp'
OPT_INV = 'inv'
OPT_GMV = "gmv"
OPT_MVO = "mvo"
OPT_RP = "rp"
OPT_INV = "inv"
def __init__(self, method: str = 'inv', lamb: float = 0, delta: float = 0,
alpha: float = 0.0, scale_alpha: bool = True, tol: float = 1e-8):
def __init__(
self,
method: str = "inv",
lamb: float = 0,
delta: float = 0,
alpha: float = 0.0,
scale_alpha: bool = True,
tol: float = 1e-8,
):
"""
Args:
method (str): portfolio optimization method
@@ -37,24 +44,26 @@ class PortfolioOptimizer(object):
alpha (float): l2 norm regularizer
tol (float): tolerance for optimization termination
"""
assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], \
f'method `{method}` is not supported'
assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported"
self.method = method
assert lamb >= 0, f'risk aversion parameter `lamb` should be positive'
assert lamb >= 0, f"risk aversion parameter `lamb` should be positive"
self.lamb = lamb
assert delta >= 0, f'turnover limit `delta` should be positive'
assert delta >= 0, f"turnover limit `delta` should be positive"
self.delta = delta
assert alpha >= 0, f'l2 norm regularizer `alpha` should be positive'
assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive"
self.alpha = alpha
self.tol = tol
def __call__(self, S: Union[np.ndarray, pd.DataFrame],
u: Optional[Union[np.ndarray, pd.Series]] = None,
w0: Optional[Union[np.ndarray, pd.Series]] = None) -> Union[np.ndarray, pd.Series]:
def __call__(
self,
S: Union[np.ndarray, pd.DataFrame],
u: Optional[Union[np.ndarray, pd.Series]] = None,
w0: Optional[Union[np.ndarray, pd.Series]] = None,
) -> Union[np.ndarray, pd.Series]:
"""
Args:
S (np.ndarray or pd.DataFrame): covariance matrix
@@ -72,22 +81,22 @@ class PortfolioOptimizer(object):
# transform alpha
if u is not None:
assert len(u) == len(S), '`u` has mismatched shape'
assert len(u) == len(S), "`u` has mismatched shape"
if isinstance(u, pd.Series):
assert all(u.index == index), '`u` has mismatched index'
assert all(u.index == index), "`u` has mismatched index"
u = u.values
# transform initial weights
if w0 is not None:
assert len(w0) == len(S), '`w0` has mismatched shape'
assert len(w0) == len(S), "`w0` has mismatched shape"
if isinstance(w0, pd.Series):
assert all(w0.index == index), '`w0` has mismatched index'
assert all(w0.index == index), "`w0` has mismatched index"
w0 = w0.values
# scale alpha to match volatility
if u is not None:
u = u / u.std()
u *= np.mean(np.diag(S))**0.5
u *= np.mean(np.diag(S)) ** 0.5
# optimize
w = self._optimize(S, u, w0)
@@ -98,21 +107,20 @@ class PortfolioOptimizer(object):
return w
def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None,
w0: Optional[np.ndarray] = None) -> np.ndarray:
def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
# inverse volatility
if self.method == self.OPT_INV:
if u is not None:
warnings.warn('`u` is set but will not be used for `inv` portfolio')
warnings.warn("`u` is set but will not be used for `inv` portfolio")
if w0 is not None:
warnings.warn('`w0` is set but will not be used for `inv` portfolio')
warnings.warn("`w0` is set but will not be used for `inv` portfolio")
return self._optimize_inv(S)
# global minimum variance
if self.method == self.OPT_GMV:
if u is not None:
warnings.warn('`u` is set but will not be used for `gmv` portfolio')
warnings.warn("`u` is set but will not be used for `gmv` portfolio")
return self._optimize_gmv(S, w0)
# mean-variance
@@ -122,12 +130,12 @@ class PortfolioOptimizer(object):
# risk parity
if self.method == self.OPT_RP:
if u is not None:
warnings.warn('`u` is set but will not be used for `rp` portfolio')
warnings.warn("`u` is set but will not be used for `rp` portfolio")
return self._optimize_rp(S, w0)
def _optimize_inv(self, S: np.ndarray) -> np.ndarray:
"""Inverse volatility"""
vola = np.diag(S)**0.5
vola = np.diag(S) ** 0.5
w = 1 / vola
w /= w.sum()
return w
@@ -140,14 +148,11 @@ class PortfolioOptimizer(object):
s.t. w >= 0, sum(w) == 1
where `S` is the covariance matrix.
"""
return self._solve(
len(S),
self._get_objective_gmv(S),
*self._get_constrains(w0)
)
return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0))
def _optimize_mvo(self, S: np.ndarray, u: Optional[np.ndarray] = None,
w0: Optional[np.ndarray] = None) -> np.ndarray:
def _optimize_mvo(
self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None
) -> np.ndarray:
"""optimize mean-variance portfolio
This method solves the following optimization problem
@@ -156,11 +161,7 @@ class PortfolioOptimizer(object):
where `S` is the covariance matrix, `u` is the expected returns,
and `lamb` is the risk aversion parameter.
"""
return self._solve(
len(S),
self._get_objective_mvo(S, u),
*self._get_constrains(w0)
)
return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0))
def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray:
"""optimize risk parity portfolio
@@ -170,11 +171,7 @@ class PortfolioOptimizer(object):
s.t. w >= 0, sum(w) == 1
where `S` is the covariance matrix and `N` is the number of stocks.
"""
return self._solve(
len(S),
self._get_objective_rp(S),
*self._get_constrains(w0)
)
return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0))
def _get_objective_gmv(self, S: np.ndarray) -> np.ndarray:
"""global minimum variance optimization objective
@@ -213,7 +210,7 @@ class PortfolioOptimizer(object):
N = len(x)
Sx = S @ x
xSx = x @ Sx
return np.sum((x - xSx / Sx / N)**2)
return np.sum((x - xSx / Sx / N) ** 2)
return func
@@ -230,15 +227,11 @@ class PortfolioOptimizer(object):
bounds = so.Bounds(0.0, 1.0)
# full investment constraint
cons = [
{'type': 'eq', 'fun': lambda x: np.sum(x) - 1} # == 0
]
cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0
# turnover constraint
if w0 is not None:
cons.append(
{'type': 'ineq', 'fun': lambda x: self.delta - np.sum(np.abs(x - w0))} # >= 0
)
cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0
return bounds, cons
@@ -257,9 +250,9 @@ class PortfolioOptimizer(object):
wrapped_obj = lambda x: obj(x) + self.alpha * np.sum(np.square(x))
# solve
x0 = np.ones(n) / n # init results
x0 = np.ones(n) / n # init results
sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol)
if not sol.success:
warnings.warn(f'optimization not success ({sol.status})')
warnings.warn(f"optimization not success ({sol.status})")
return sol.x

View File

@@ -10,6 +10,7 @@ class QlibRecorder:
"""
A global system that helps to manage the experiments.
"""
def __init__(self, exp_manager, uri):
self.exp_manager = exp_manager
self.uri = uri
@@ -20,7 +21,7 @@ class QlibRecorder:
try:
yield run
except:
self.end_exp() # end the experiment if something went wrong
self.end_exp() # end the experiment if something went wrong
self.end_exp()
def start_exp(self, experiment_name=None):

View File

@@ -5,6 +5,7 @@ import mlflow
from pathlib import Path
from .recorder import MLflowRecorder
class Experiment:
"""
Thie is the `Experiment` class for each experiment being run. The API is designed
@@ -21,7 +22,7 @@ class Experiment:
Parameters
----------
Returns
-------
A recorder instance.
@@ -84,4 +85,4 @@ class MLflowExperiment(Experiment):
def delete_recorder(self, rid):
mlflow.delete_run(rid)
self.recorders = [r for r in self.recorders if r.recorder_id == rid]
self.recorders = [r for r in self.recorders if r.recorder_id == rid]

View File

@@ -9,7 +9,8 @@ from .exp import MLflowExperiment
from .recorder import MLflowRecorder
from ..log import get_module_logger
logger = get_module_logger('workflow', 'Warning')
logger = get_module_logger("workflow", "Warning")
class ExpManager:
"""

View File

@@ -50,23 +50,24 @@ class SignalRecord(RecordTemp):
def generate(self, **kwargs):
# generate prediciton
pred = self.model.predict(self.dataset)
self.recorder.save_object(pred, 'pred.pkl')
self.recorder.save_object(pred, "pred.pkl")
def load(self):
# try to load the saved object
try:
pred = self.recorder.load_object('pred.pkl')
pred = self.recorder.load_object("pred.pkl")
return pred
except:
raise Exception('Something went wrong when loading the saved object.')
raise Exception("Something went wrong when loading the saved object.")
def check(self, **kwargs):
return self.recorder.check('pred.pkl')
return self.recorder.check("pred.pkl")
# TODO
class SigAnaRecord(SignalRecord):
def __init__(self, recorder, **kwargs):
pass
def generate(self):
pass
@@ -85,7 +86,7 @@ class PortAnaRecord(SignalRecord):
self.BACKTEST_CONFIG = BACKTEST_CONFIG
module = get_module_by_module_path("qlib.contrib.strategy")
self.strategy = init_instance_by_config(STRATEGY_CONFIG, module)
self.artifact_path = Path('portfolio_analysis').resolve()
self.artifact_path = Path("portfolio_analysis").resolve()
def generate(self, **kwargs):
"""
@@ -99,8 +100,8 @@ class PortAnaRecord(SignalRecord):
# custom strategy and get backtest
pred_score = super().load()
report_normal, positions_normal = normal_backtest(pred_score, strategy=self.strategy, **self.BACKTEST_CONFIG)
self.recorder.save_object(report_normal, 'report_normal.pkl', self.artifact_path)
self.recorder.save_object(positions_normal, 'positions_normal.pkl', self.artifact_path)
self.recorder.save_object(report_normal, "report_normal.pkl", self.artifact_path)
self.recorder.save_object(positions_normal, "positions_normal.pkl", self.artifact_path)
# analysis
analysis = dict()
@@ -109,29 +110,15 @@ class PortAnaRecord(SignalRecord):
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
self.recorder.save_object(pred, 'port_analysis.pkl', self.artifact_path)
self.recorder.save_object(pred, "port_analysis.pkl", self.artifact_path)
def load(self):
# try to load the saved object
try:
pred = self.recorder.load_object(self.artifact_path / 'port_analysis.pkl'')
pred = self.recorder.load_object(self.artifact_path / "port_analysis.pkl")
return pred
except:
raise Exception('Something went wrong when loading the saved object.')
raise Exception("Something went wrong when loading the saved object.")
def check(self):
return self.recorder.check('port_analysis.pkl', self.artifact_path)
return self.recorder.check("port_analysis.pkl", self.artifact_path)

View File

@@ -166,7 +166,7 @@ class MLflowRecorder(Recorder):
# save the run id and artifact_uri
self.recorder_id = run.info.run_id
self.artifact_uri = run.info.artifact_uri
self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder
self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder
# set up file manager for saving objects
self.temp_dir = tempfile.mkdtemp()
self.fm = FileManager(Path(self.temp_dir).absolute())
@@ -238,7 +238,7 @@ class MLflowRecorder(Recorder):
def check(self, name, path=None):
client = mlflow.tracking.MlflowClient(tracking_uri=self._uri)
artifacts = client.list_artifacts(self.recorder_id, path)
for artifact in artifacts
for artifact in artifacts:
if name in artifact.path:
return True
return False
return False