diff --git a/qlib/data/data.py b/qlib/data/data.py index 8eae9f01c..11dc62d91 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -1032,12 +1032,12 @@ def register_all_wrappers(): """register_all_wrappers""" logger = get_module_logger("data") module = get_module_by_module_path("qlib.data") - + _calendar_provider = init_instance_by_config(C.calendar_provider, module) if getattr(C, "calendar_cache", None) is not None: _calendar_cache_config = {} _calendar_cache_config.update(C.calendar_cache) - _calendar_cache_config['kwargs'].update(provider=_calendar_provider) + _calendar_cache_config["kwargs"].update(provider=_calendar_provider) _calendar_provider = init_instance_by_config(_calendar_cache_config, module) register_wrapper(Cal, _calendar_provider, "qlib.data") logger.debug(f"registering Cal {C.calendar_provider}-{C.calenar_cache}") @@ -1056,7 +1056,7 @@ def register_all_wrappers(): if getattr(C, "expression_cache", None) is not None: _expression_cache_config = {} _expression_cache_config.update(C.expression_cache) - _expression_cache_config['kwargs'].update(provider=_eprovider) + _expression_cache_config["kwargs"].update(provider=_eprovider) _eprovider = init_instance_by_config(C.expression_cache, module) register_wrapper(ExpressionD, _eprovider, "qlib.data") logger.debug(f"registering ExpressioneD {C.expression_provider}-{C.expression_cache}") @@ -1065,7 +1065,7 @@ def register_all_wrappers(): if getattr(C, "dataset_cache", None) is not None: _dataset_cache_config = {} _dataset_cache_config.update(C.dataset_cache) - _dataset_cache_config['kwargs'].update(provider=_dprovider) + _dataset_cache_config["kwargs"].update(provider=_dprovider) _dprovider = init_instance_by_config(_dataset_cache_config, module) register_wrapper(DatasetD, _dprovider, "qlib.data") logger.debug(f"registering DataseteD {C.dataset_provider}-{C.dataset_cache}") diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index b8751830d..3e295e3ca 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -125,7 +125,7 @@ class DataHandler(Serializable): selector: Union[pd.Timestamp, slice, str], level: Union[str, int] = "datetime", col_set: Union[str, List[str]] = CS_ALL, - squeeze: bool = False + squeeze: bool = False, ) -> pd.DataFrame: """ fetch data from underlying data source @@ -184,17 +184,18 @@ class DataHandler(Serializable): cur_date (pd.Timestamp or str): current date periods (int): number of periods """ - trading_dates = self._data.index.unique(level='datetime') + trading_dates = self._data.index.unique(level="datetime") cur_loc = trading_dates.get_loc(cur_date) pre_loc = cur_loc - periods + 1 if pre_loc < 0: - warnings.warn('`periods` is too large. the first date will be returned.') + warnings.warn("`periods` is too large. the first date will be returned.") pre_loc = 0 ref_date = trading_dates[pre_loc] return slice(ref_date, cur_date) - def get_range_iterator(self, periods: int, min_periods: Optional[int] = None, - **kwargs) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]: + def get_range_iterator( + self, periods: int, min_periods: Optional[int] = None, **kwargs + ) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]: """ get a iterator of sliced data with given periods @@ -203,7 +204,7 @@ class DataHandler(Serializable): min_periods (int): minimum periods for sliced dataframe kwargs (dict): will be passed to `self.fetch` """ - trading_dates = self._data.index.unique(level='datetime') + trading_dates = self._data.index.unique(level="datetime") if min_periods is None: min_periods = periods for cur_date in trading_dates[min_periods:]: diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index abfc695d9..816cf1c4a 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -9,10 +9,12 @@ from typing import Tuple from qlib.data import D + class DataLoader(abc.ABC): - ''' + """ DataLoader is designed for loading raw data from original data source. - ''' + """ + @abc.abstractmethod def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame: """ @@ -47,7 +49,8 @@ class DataLoader(abc.ABC): class QlibDataLoader(DataLoader): - '''Same as QlibDataLoader. The fields can be define by config''' + """Same as QlibDataLoader. The fields can be define by config""" + def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None): """ Parameters @@ -64,7 +67,7 @@ class QlibDataLoader(DataLoader): := ["expr", ...] | (["expr", ...], ["col_name", ...]) """ - self.is_group = isinstance(config, dict) + self.is_group = isinstance(config, dict) if self.is_group: self.fields = {grp: self._parse_fields_info(fields_info) for grp, fields_info in config.items()} @@ -86,15 +89,17 @@ class QlibDataLoader(DataLoader): if isinstance(instruments, str): instruments = D.instruments(instruments, filter_pipe=self.filter_pipe) elif self.filter_pipe is not None: - warnings.warn('`filter_pipe` is not None, but it will not be used with `instruments` as list') + warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list") + def _get_df(exprs, names): df = D.features(instruments, exprs, start_time, end_time) df.columns = names return df + if self.is_group: df = pd.concat({grp: _get_df(exprs, names) for grp, (exprs, names) in self.fields.items()}, axis=1) else: exprs, names = self.fields df = _get_df(exprs, names) - df = df.swaplevel().sort_index() # NOTE: always return + df = df.swaplevel().sort_index() # NOTE: always return return df diff --git a/qlib/data/ops.py b/qlib/data/ops.py index d9c657595..fb4b40784 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -701,7 +701,7 @@ class Rolling(ExpressionOps): if self.N == 0: return np.inf if 0 < self.N < 1: - return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6 + return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6 return self.feature.get_longest_back_rolling() + self.N - 1 def get_extended_window_size(self): diff --git a/qlib/model/riskmodel.py b/qlib/model/riskmodel.py index e63b8d4a2..42c2b710f 100644 --- a/qlib/model/riskmodel.py +++ b/qlib/model/riskmodel.py @@ -16,11 +16,11 @@ class RiskModel(BaseModel): A risk model is used to estimate the covariance matrix of stock returns. """ - MASK_NAN = 'mask' - FILL_NAN = 'fill' - IGNORE_NAN = 'ignore' + MASK_NAN = "mask" + FILL_NAN = "fill" + IGNORE_NAN = "ignore" - def __init__(self, nan_option: str = 'ignore', assume_centered: bool = False, scale_return: bool = True): + def __init__(self, nan_option: str = "ignore", assume_centered: bool = False, scale_return: bool = True): """ Args: nan_option (str): nan handling option (`ignore`/`mask`/`fill`) @@ -28,15 +28,19 @@ class RiskModel(BaseModel): scale_return (bool): whether scale returns as percentage """ # nan - assert nan_option in [self.MASK_NAN, self.FILL_NAN, self.IGNORE_NAN], \ - f'`nan_option={nan_option}` is not supported' + assert nan_option in [ + self.MASK_NAN, + self.FILL_NAN, + self.IGNORE_NAN, + ], f"`nan_option={nan_option}` is not supported" self.nan_option = nan_option self.assume_centered = assume_centered self.scale_return = scale_return - def predict(self, X: Union[pd.Series, pd.DataFrame, np.ndarray], - return_corr: bool = False, is_price: bool = True) -> Union[pd.DataFrame, np.ndarray]: + def predict( + self, X: Union[pd.Series, pd.DataFrame, np.ndarray], return_corr: bool = False, is_price: bool = True + ) -> Union[pd.DataFrame, np.ndarray]: """ Args: X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance, @@ -53,18 +57,18 @@ class RiskModel(BaseModel): else: if isinstance(X.index, pd.MultiIndex): if isinstance(X, pd.DataFrame): - X = X.iloc[:, 0].unstack(level='instrument') # always use the first column + X = X.iloc[:, 0].unstack(level="instrument") # always use the first column else: - X = X.unstack(level='instrument') + X = X.unstack(level="instrument") else: # X is 2D DataFrame pass - columns = X.columns # will be used to restore dataframe + columns = X.columns # will be used to restore dataframe X = X.values # calculate pct_change if is_price: - X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows + X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows # scale return if self.scale_return: @@ -106,7 +110,7 @@ class RiskModel(BaseModel): N = len(X) if isinstance(X, np.ma.MaskedArray): M = 1 - X.mask - N = M.T.dot(M) # each pair has distinct number of samples + N = M.T.dot(M) # each pair has distinct number of samples return xTx / N def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]: @@ -165,14 +169,14 @@ class ShrinkCovEstimator(RiskModel): [7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip """ - SHR_LW = 'lw' - SHR_OAS = 'oas' + SHR_LW = "lw" + SHR_OAS = "oas" - TGT_CONST_VAR = 'const_var' - TGT_CONST_CORR = 'const_corr' - TGT_SINGLE_FACTOR = 'single_factor' + TGT_CONST_VAR = "const_var" + TGT_CONST_CORR = "const_corr" + TGT_SINGLE_FACTOR = "single_factor" - def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = 'const_var', **kwargs): + def __init__(self, alpha: Union[str, float] = 0.0, target: Union[str, np.ndarray] = "const_var", **kwargs): """ Args: alpha (str or float): shrinking parameter or estimator (`lw`/`oas`) @@ -183,24 +187,26 @@ class ShrinkCovEstimator(RiskModel): # alpha if isinstance(alpha, str): - assert alpha in [self.SHR_LW, self.SHR_OAS], \ - f'shrinking method `{alpha}` is not supported' + assert alpha in [self.SHR_LW, self.SHR_OAS], f"shrinking method `{alpha}` is not supported" elif isinstance(alpha, (float, np.floating)): - assert 0 <= alpha <= 1, 'alpha should be between [0, 1]' + assert 0 <= alpha <= 1, "alpha should be between [0, 1]" else: - raise TypeError('invalid argument type for `alpha`') + raise TypeError("invalid argument type for `alpha`") self.alpha = alpha # target if isinstance(target, str): - assert target in [self.TGT_CONST_VAR, self.TGT_CONST_CORR, self.TGT_SINGLE_FACTOR], \ - f'shrinking target `{target} is not supported' + assert target in [ + self.TGT_CONST_VAR, + self.TGT_CONST_CORR, + self.TGT_SINGLE_FACTOR, + ], f"shrinking target `{target} is not supported" elif isinstance(target, np.ndarray): pass else: - raise TypeError('invalid argument type for `target`') + raise TypeError("invalid argument type for `target`") if alpha == self.SHR_OAS and target != self.TGT_CONST_VAR: - raise NotImplementedError('currently `oas` can only support `const_var` as target') + raise NotImplementedError("currently `oas` can only support `const_var` as target") self.target = target def _predict(self, X: np.ndarray) -> np.ndarray: @@ -215,7 +221,7 @@ class ShrinkCovEstimator(RiskModel): # shrink covariance if alpha > 0: - S *= (1 - alpha) + S *= 1 - alpha F *= alpha S += F @@ -292,8 +298,8 @@ class ShrinkCovEstimator(RiskModel): alpha = A / B where `n`, `p` are the dim of observations and variables respectively. """ - trS2 = np.sum(S**2) - tr2S = np.trace(S)**2 + trS2 = np.sum(S ** 2) + tr2S = np.trace(S) ** 2 n, p = X.shape @@ -310,10 +316,10 @@ class ShrinkCovEstimator(RiskModel): """ t, n = X.shape - y = X**2 - phi = np.sum(y.T.dot(y) / t - S**2) + y = X ** 2 + phi = np.sum(y.T.dot(y) / t - S ** 2) - gamma = np.linalg.norm(S - F, 'fro')**2 + gamma = np.linalg.norm(S - F, "fro") ** 2 kappa = phi / gamma alpha = max(0, min(1, kappa / t)) @@ -331,15 +337,15 @@ class ShrinkCovEstimator(RiskModel): sqrt_var = np.sqrt(var) r_bar = (np.sum(S / np.outer(sqrt_var, sqrt_var)) - n) / (n * (n - 1)) - y = X**2 - phi_mat = y.T.dot(y) / t - S**2 + y = X ** 2 + phi_mat = y.T.dot(y) / t - S ** 2 phi = np.sum(phi_mat) - theta_mat = (X**3).T.dot(X) / t - var[:, None] * S + theta_mat = (X ** 3).T.dot(X) / t - var[:, None] * S np.fill_diagonal(theta_mat, 0) rho = np.sum(np.diag(phi_mat)) + r_bar * np.sum(np.outer(1 / sqrt_var, sqrt_var) * theta_mat) - gamma = np.linalg.norm(S - F, 'fro')**2 + gamma = np.linalg.norm(S - F, "fro") ** 2 kappa = (phi - rho) / gamma alpha = max(0, min(1, kappa / t)) @@ -357,19 +363,21 @@ class ShrinkCovEstimator(RiskModel): cov_mkt = np.asarray(X.T.dot(X_mkt) / len(X)) var_mkt = np.asarray(X_mkt.dot(X_mkt) / len(X)) - y = X**2 - phi = np.sum(y.T.dot(y)) / t - np.sum(S**2) + y = X ** 2 + phi = np.sum(y.T.dot(y)) / t - np.sum(S ** 2) - rdiag = np.sum(y**2) / t - np.sum(np.diag(S)**2) + rdiag = np.sum(y ** 2) / t - np.sum(np.diag(S) ** 2) z = X * X_mkt[:, None] v1 = y.T.dot(z) / t - cov_mkt[:, None] * S roff1 = np.sum(v1 * cov_mkt[:, None].T) / var_mkt - np.sum(np.diag(v1) * cov_mkt) / var_mkt v3 = z.T.dot(z) / t - var_mkt * S - roff3 = np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt**2 - np.sum(np.diag(v3) * cov_mkt**2) / var_mkt**2 + roff3 = ( + np.sum(v3 * np.outer(cov_mkt, cov_mkt)) / var_mkt ** 2 - np.sum(np.diag(v3) * cov_mkt ** 2) / var_mkt ** 2 + ) roff = 2 * roff1 - roff3 rho = rdiag + roff - gamma = np.linalg.norm(S - F, 'fro')**2 + gamma = np.linalg.norm(S - F, "fro") ** 2 kappa = (phi - rho) / gamma alpha = max(0, min(1, kappa / t)) @@ -386,11 +394,11 @@ class POETCovEstimator(RiskModel): [2] http://econweb.rutgers.edu/yl1114/papers/poet/POET.m """ - THRESH_SOFT = 'soft' - THRESH_HARD = 'hard' - THRESH_SCAD = 'scad' + THRESH_SOFT = "soft" + THRESH_HARD = "hard" + THRESH_SCAD = "scad" - def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = 'soft', **kwargs): + def __init__(self, num_factors: int = 0, thresh: float = 1.0, thresh_method: str = "soft", **kwargs): """ Args: num_factors (int): number of factors (if set to zero, no factor model will be used) @@ -403,25 +411,28 @@ class POETCovEstimator(RiskModel): """ super().__init__(**kwargs) - assert num_factors >= 0, '`num_factors` requires a positive integer' + assert num_factors >= 0, "`num_factors` requires a positive integer" self.num_factors = num_factors - assert thresh >= 0, '`thresh` requires a positive float number' + assert thresh >= 0, "`thresh` requires a positive float number" self.thresh = thresh - assert thresh_method in [self.THRESH_HARD, self.THRESH_SOFT, self.THRESH_SCAD], \ - '`thresh_method` should be `soft`/`hard`/`scad`' + assert thresh_method in [ + self.THRESH_HARD, + self.THRESH_SOFT, + self.THRESH_SCAD, + ], "`thresh_method` should be `soft`/`hard`/`scad`" self.thresh_method = thresh_method def _predict(self, X: np.ndarray) -> np.ndarray: - Y = X.T # NOTE: to match POET's implementation + Y = X.T # NOTE: to match POET's implementation p, n = Y.shape if self.num_factors > 0: Dd, V = np.linalg.eig(Y.T.dot(Y)) V = V[:, np.argsort(Dd)] - F = V[:, -self.num_factors:][:, ::-1] * np.sqrt(n) + F = V[:, -self.num_factors :][:, ::-1] * np.sqrt(n) LamPCA = Y.dot(F) / n uhat = np.asarray(Y - LamPCA.dot(F.T)) Lowrank = np.asarray(LamPCA.dot(LamPCA.T)) @@ -434,12 +445,12 @@ class POETCovEstimator(RiskModel): lamb = rate * self.thresh SuPCA = uhat.dot(uhat.T) / n SuDiag = np.diag(np.diag(SuPCA)) - R = np.linalg.inv(SuDiag**0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag**0.5)) + R = np.linalg.inv(SuDiag ** 0.5).dot(SuPCA).dot(np.linalg.inv(SuDiag ** 0.5)) if self.thresh_method == self.THRESH_HARD: M = R * (np.abs(R) > lamb) elif self.thresh_method == self.THRESH_SOFT: - res = (np.abs(R) - lamb) + res = np.abs(R) - lamb res = (res + np.abs(res)) / 2 M = np.sign(R) * res else: @@ -449,7 +460,7 @@ class POETCovEstimator(RiskModel): M = M1 + M2 + M3 Rthresh = M - np.diag(np.diag(M)) + np.eye(p) - SigmaU = (SuDiag**0.5).dot(Rthresh).dot(SuDiag**0.5) + SigmaU = (SuDiag ** 0.5).dot(Rthresh).dot(SuDiag ** 0.5) SigmaY = SigmaU + Lowrank return SigmaY diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 4b06e25b3..534a66e2d 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -22,13 +22,20 @@ class PortfolioOptimizer(object): This optimizer always assumes full investment and no-shorting. """ - OPT_GMV = 'gmv' - OPT_MVO = 'mvo' - OPT_RP = 'rp' - OPT_INV = 'inv' + OPT_GMV = "gmv" + OPT_MVO = "mvo" + OPT_RP = "rp" + OPT_INV = "inv" - def __init__(self, method: str = 'inv', lamb: float = 0, delta: float = 0, - alpha: float = 0.0, scale_alpha: bool = True, tol: float = 1e-8): + def __init__( + self, + method: str = "inv", + lamb: float = 0, + delta: float = 0, + alpha: float = 0.0, + scale_alpha: bool = True, + tol: float = 1e-8, + ): """ Args: method (str): portfolio optimization method @@ -37,24 +44,26 @@ class PortfolioOptimizer(object): alpha (float): l2 norm regularizer tol (float): tolerance for optimization termination """ - assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], \ - f'method `{method}` is not supported' + assert method in [self.OPT_GMV, self.OPT_MVO, self.OPT_RP, self.OPT_INV], f"method `{method}` is not supported" self.method = method - assert lamb >= 0, f'risk aversion parameter `lamb` should be positive' + assert lamb >= 0, f"risk aversion parameter `lamb` should be positive" self.lamb = lamb - assert delta >= 0, f'turnover limit `delta` should be positive' + assert delta >= 0, f"turnover limit `delta` should be positive" self.delta = delta - assert alpha >= 0, f'l2 norm regularizer `alpha` should be positive' + assert alpha >= 0, f"l2 norm regularizer `alpha` should be positive" self.alpha = alpha self.tol = tol - def __call__(self, S: Union[np.ndarray, pd.DataFrame], - u: Optional[Union[np.ndarray, pd.Series]] = None, - w0: Optional[Union[np.ndarray, pd.Series]] = None) -> Union[np.ndarray, pd.Series]: + def __call__( + self, + S: Union[np.ndarray, pd.DataFrame], + u: Optional[Union[np.ndarray, pd.Series]] = None, + w0: Optional[Union[np.ndarray, pd.Series]] = None, + ) -> Union[np.ndarray, pd.Series]: """ Args: S (np.ndarray or pd.DataFrame): covariance matrix @@ -72,22 +81,22 @@ class PortfolioOptimizer(object): # transform alpha if u is not None: - assert len(u) == len(S), '`u` has mismatched shape' + assert len(u) == len(S), "`u` has mismatched shape" if isinstance(u, pd.Series): - assert all(u.index == index), '`u` has mismatched index' + assert all(u.index == index), "`u` has mismatched index" u = u.values # transform initial weights if w0 is not None: - assert len(w0) == len(S), '`w0` has mismatched shape' + assert len(w0) == len(S), "`w0` has mismatched shape" if isinstance(w0, pd.Series): - assert all(w0.index == index), '`w0` has mismatched index' + assert all(w0.index == index), "`w0` has mismatched index" w0 = w0.values # scale alpha to match volatility if u is not None: u = u / u.std() - u *= np.mean(np.diag(S))**0.5 + u *= np.mean(np.diag(S)) ** 0.5 # optimize w = self._optimize(S, u, w0) @@ -98,21 +107,20 @@ class PortfolioOptimizer(object): return w - def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, - w0: Optional[np.ndarray] = None) -> np.ndarray: + def _optimize(self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray: # inverse volatility if self.method == self.OPT_INV: if u is not None: - warnings.warn('`u` is set but will not be used for `inv` portfolio') + warnings.warn("`u` is set but will not be used for `inv` portfolio") if w0 is not None: - warnings.warn('`w0` is set but will not be used for `inv` portfolio') + warnings.warn("`w0` is set but will not be used for `inv` portfolio") return self._optimize_inv(S) # global minimum variance if self.method == self.OPT_GMV: if u is not None: - warnings.warn('`u` is set but will not be used for `gmv` portfolio') + warnings.warn("`u` is set but will not be used for `gmv` portfolio") return self._optimize_gmv(S, w0) # mean-variance @@ -122,12 +130,12 @@ class PortfolioOptimizer(object): # risk parity if self.method == self.OPT_RP: if u is not None: - warnings.warn('`u` is set but will not be used for `rp` portfolio') + warnings.warn("`u` is set but will not be used for `rp` portfolio") return self._optimize_rp(S, w0) def _optimize_inv(self, S: np.ndarray) -> np.ndarray: """Inverse volatility""" - vola = np.diag(S)**0.5 + vola = np.diag(S) ** 0.5 w = 1 / vola w /= w.sum() return w @@ -140,14 +148,11 @@ class PortfolioOptimizer(object): s.t. w >= 0, sum(w) == 1 where `S` is the covariance matrix. """ - return self._solve( - len(S), - self._get_objective_gmv(S), - *self._get_constrains(w0) - ) + return self._solve(len(S), self._get_objective_gmv(S), *self._get_constrains(w0)) - def _optimize_mvo(self, S: np.ndarray, u: Optional[np.ndarray] = None, - w0: Optional[np.ndarray] = None) -> np.ndarray: + def _optimize_mvo( + self, S: np.ndarray, u: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None + ) -> np.ndarray: """optimize mean-variance portfolio This method solves the following optimization problem @@ -156,11 +161,7 @@ class PortfolioOptimizer(object): where `S` is the covariance matrix, `u` is the expected returns, and `lamb` is the risk aversion parameter. """ - return self._solve( - len(S), - self._get_objective_mvo(S, u), - *self._get_constrains(w0) - ) + return self._solve(len(S), self._get_objective_mvo(S, u), *self._get_constrains(w0)) def _optimize_rp(self, S: np.ndarray, w0: Optional[np.ndarray] = None) -> np.ndarray: """optimize risk parity portfolio @@ -170,11 +171,7 @@ class PortfolioOptimizer(object): s.t. w >= 0, sum(w) == 1 where `S` is the covariance matrix and `N` is the number of stocks. """ - return self._solve( - len(S), - self._get_objective_rp(S), - *self._get_constrains(w0) - ) + return self._solve(len(S), self._get_objective_rp(S), *self._get_constrains(w0)) def _get_objective_gmv(self, S: np.ndarray) -> np.ndarray: """global minimum variance optimization objective @@ -213,7 +210,7 @@ class PortfolioOptimizer(object): N = len(x) Sx = S @ x xSx = x @ Sx - return np.sum((x - xSx / Sx / N)**2) + return np.sum((x - xSx / Sx / N) ** 2) return func @@ -230,15 +227,11 @@ class PortfolioOptimizer(object): bounds = so.Bounds(0.0, 1.0) # full investment constraint - cons = [ - {'type': 'eq', 'fun': lambda x: np.sum(x) - 1} # == 0 - ] + cons = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}] # == 0 # turnover constraint if w0 is not None: - cons.append( - {'type': 'ineq', 'fun': lambda x: self.delta - np.sum(np.abs(x - w0))} # >= 0 - ) + cons.append({"type": "ineq", "fun": lambda x: self.delta - np.sum(np.abs(x - w0))}) # >= 0 return bounds, cons @@ -257,9 +250,9 @@ class PortfolioOptimizer(object): wrapped_obj = lambda x: obj(x) + self.alpha * np.sum(np.square(x)) # solve - x0 = np.ones(n) / n # init results + x0 = np.ones(n) / n # init results sol = so.minimize(wrapped_obj, x0, bounds=bounds, constraints=cons, tol=self.tol) if not sol.success: - warnings.warn(f'optimization not success ({sol.status})') + warnings.warn(f"optimization not success ({sol.status})") return sol.x diff --git a/qlib/workflow/__init__.py b/qlib/workflow/__init__.py index 31b9ae2d7..3898dfd35 100644 --- a/qlib/workflow/__init__.py +++ b/qlib/workflow/__init__.py @@ -10,6 +10,7 @@ class QlibRecorder: """ A global system that helps to manage the experiments. """ + def __init__(self, exp_manager, uri): self.exp_manager = exp_manager self.uri = uri @@ -20,7 +21,7 @@ class QlibRecorder: try: yield run except: - self.end_exp() # end the experiment if something went wrong + self.end_exp() # end the experiment if something went wrong self.end_exp() def start_exp(self, experiment_name=None): diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index 335dd338b..672a77d93 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -5,6 +5,7 @@ import mlflow from pathlib import Path from .recorder import MLflowRecorder + class Experiment: """ Thie is the `Experiment` class for each experiment being run. The API is designed @@ -21,7 +22,7 @@ class Experiment: Parameters ---------- - + Returns ------- A recorder instance. @@ -84,4 +85,4 @@ class MLflowExperiment(Experiment): def delete_recorder(self, rid): mlflow.delete_run(rid) - self.recorders = [r for r in self.recorders if r.recorder_id == rid] \ No newline at end of file + self.recorders = [r for r in self.recorders if r.recorder_id == rid] diff --git a/qlib/workflow/expm.py b/qlib/workflow/expm.py index 3c633e3bb..34a76e61d 100644 --- a/qlib/workflow/expm.py +++ b/qlib/workflow/expm.py @@ -9,7 +9,8 @@ from .exp import MLflowExperiment from .recorder import MLflowRecorder from ..log import get_module_logger -logger = get_module_logger('workflow', 'Warning') +logger = get_module_logger("workflow", "Warning") + class ExpManager: """ diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 62ee14405..e45ef47b6 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -50,23 +50,24 @@ class SignalRecord(RecordTemp): def generate(self, **kwargs): # generate prediciton pred = self.model.predict(self.dataset) - self.recorder.save_object(pred, 'pred.pkl') - + self.recorder.save_object(pred, "pred.pkl") + def load(self): # try to load the saved object try: - pred = self.recorder.load_object('pred.pkl') + pred = self.recorder.load_object("pred.pkl") return pred except: - raise Exception('Something went wrong when loading the saved object.') + raise Exception("Something went wrong when loading the saved object.") def check(self, **kwargs): - return self.recorder.check('pred.pkl') + return self.recorder.check("pred.pkl") # TODO class SigAnaRecord(SignalRecord): def __init__(self, recorder, **kwargs): + pass def generate(self): pass @@ -85,7 +86,7 @@ class PortAnaRecord(SignalRecord): self.BACKTEST_CONFIG = BACKTEST_CONFIG module = get_module_by_module_path("qlib.contrib.strategy") self.strategy = init_instance_by_config(STRATEGY_CONFIG, module) - self.artifact_path = Path('portfolio_analysis').resolve() + self.artifact_path = Path("portfolio_analysis").resolve() def generate(self, **kwargs): """ @@ -99,8 +100,8 @@ class PortAnaRecord(SignalRecord): # custom strategy and get backtest pred_score = super().load() report_normal, positions_normal = normal_backtest(pred_score, strategy=self.strategy, **self.BACKTEST_CONFIG) - self.recorder.save_object(report_normal, 'report_normal.pkl', self.artifact_path) - self.recorder.save_object(positions_normal, 'positions_normal.pkl', self.artifact_path) + self.recorder.save_object(report_normal, "report_normal.pkl", self.artifact_path) + self.recorder.save_object(positions_normal, "positions_normal.pkl", self.artifact_path) # analysis analysis = dict() @@ -109,29 +110,15 @@ class PortAnaRecord(SignalRecord): report_normal["return"] - report_normal["bench"] - report_normal["cost"] ) analysis_df = pd.concat(analysis) # type: pd.DataFrame - self.recorder.save_object(pred, 'port_analysis.pkl', self.artifact_path) + self.recorder.save_object(pred, "port_analysis.pkl", self.artifact_path) def load(self): # try to load the saved object try: - pred = self.recorder.load_object(self.artifact_path / 'port_analysis.pkl'') + pred = self.recorder.load_object(self.artifact_path / "port_analysis.pkl") return pred except: - raise Exception('Something went wrong when loading the saved object.') + raise Exception("Something went wrong when loading the saved object.") def check(self): - return self.recorder.check('port_analysis.pkl', self.artifact_path) - - - - - - - - - - - - - - + return self.recorder.check("port_analysis.pkl", self.artifact_path) diff --git a/qlib/workflow/recorder.py b/qlib/workflow/recorder.py index 042b052e0..307a740b6 100644 --- a/qlib/workflow/recorder.py +++ b/qlib/workflow/recorder.py @@ -166,7 +166,7 @@ class MLflowRecorder(Recorder): # save the run id and artifact_uri self.recorder_id = run.info.run_id self.artifact_uri = run.info.artifact_uri - self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder + self._uri = mlflow.get_tracking_uri() # Fix!!! : this is not proper to have uri in recorder # set up file manager for saving objects self.temp_dir = tempfile.mkdtemp() self.fm = FileManager(Path(self.temp_dir).absolute()) @@ -238,7 +238,7 @@ class MLflowRecorder(Recorder): def check(self, name, path=None): client = mlflow.tracking.MlflowClient(tracking_uri=self._uri) artifacts = client.list_artifacts(self.recorder_id, path) - for artifact in artifacts + for artifact in artifacts: if name in artifact.path: return True - return False \ No newline at end of file + return False