1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00
Files
qlib/examples/portfolio/prepare_riskdata.py
you-n-g be4646b4b7 Adjust rolling api (#1594)
* Intermediate version

* Fix yaml template & Successfully run rolling

* Be compatible with benchmark

* Get same results with previous linear model

* Black formatting

* Update black

* Update the placeholder mechanism

* Update CI

* Update CI

* Upgrade Black

* Fix CI and simplify code

* Fix CI

* Move the data processing caching mechanism into utils.

* Adjusting DDG-DA

* Organize import
2023-07-14 12:16:12 +08:00

53 lines
1.7 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import numpy as np
import pandas as pd
from qlib.data import D
from qlib.model.riskmodel import StructuredCovEstimator
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
price_all = (
D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument")
)
# StructuredCovEstimator is a statistical risk model
riskmodel = StructuredCovEstimator()
for i in range(T - 1, len(price_all)):
date = price_all.index[i]
ref_date = price_all.index[i - T + 1]
print(date)
codes = universe.loc[date].index
price = price_all.loc[ref_date:date, codes]
# calculate return and remove extreme return
ret = price.pct_change()
ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True)
# run risk model
F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True)
# save risk data
root = riskdata_root + "/" + date.strftime("%Y%m%d")
os.makedirs(root, exist_ok=True)
pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
# for specific_risk we follow the convention to save volatility
pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl")
if __name__ == "__main__":
import qlib
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
prepare_data()