1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 01:51:18 +08:00
Files
qlib/examples/portfolio/prepare_riskdata.py
Dong Zhou 1b8f0b4575 support optimization based strategy (#754)
* support optimization based strategy

* fix riskdata not found & update doc

* refactor signal_strategy

* add portfolio example

* Update examples/portfolio/prepare_riskdata.py

Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>

* fix typo

Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>

* fix typo

Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>

* update doc

* fix riskmodel doc

Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>

Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
2021-12-28 18:44:20 +08:00

56 lines
1.7 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import numpy as np
import pandas as pd
from qlib.data import D
from qlib.model.riskmodel import StructuredCovEstimator
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
price_all = (
D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument")
)
# StructuredCovEstimator is a statistical risk model
riskmodel = StructuredCovEstimator()
for i in range(T - 1, len(price_all)):
date = price_all.index[i]
ref_date = price_all.index[i - T + 1]
print(date)
codes = universe.loc[date].index
price = price_all.loc[ref_date:date, codes]
# calculate return and remove extreme return
ret = price.pct_change()
ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True)
# run risk model
F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True)
# save risk data
root = riskdata_root + "/" + date.strftime("%Y%m%d")
os.makedirs(root, exist_ok=True)
pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
# for specific_risk we follow the convention to save volatility
pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl")
if __name__ == "__main__":
import qlib
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
prepare_data()