1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Merge branch 'main' of github.com:you-n-g/qlib into main

This commit is contained in:
Young
2020-11-19 09:18:24 +00:00
29 changed files with 521 additions and 1394 deletions

View File

@@ -0,0 +1,3 @@
pandas==1.1.2
numpy==1.17.4
catboost==0.24.3

View File

@@ -0,0 +1,4 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,62 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
module_path: qlib.contrib.model.pytorch_nn
kwargs:
input_dim: 360
output_dim: 1
layers: [256, 512, 1024, 512, 256, 128, 64]
lr: 0.001
max_steps: 300
batch_size: 2000
early_stop_rounds: 50
eval_steps: 20
lr_decay: 0.96
lr_decay_steps: 100
optimizer: gd
loss: mse
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,4 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,63 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GAT
module_path: qlib.contrib.model.pytorch_gats
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.0
n_epochs: 200
lr: 1e-3
early_stop: 20
batch_size: 800
metric: IC
loss: mse
base_model: GRU
seed: 0
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,3 @@
pandas==1.1.2
numpy==1.17.4
lightgbm==3.1.0

View File

@@ -0,0 +1,59 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,4 @@
numpy==1.17.4
pandas==1.1.2
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,62 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GRU
module_path: qlib.contrib.model.pytorch_gru
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.0
n_epochs: 200
lr: 1e-3
early_stop: 20
batch_size: 800
metric: IC
loss: mse
seed: 0
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,4 @@
numpy==1.17.4
pandas==1.1.2
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,62 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LSTM
module_path: qlib.contrib.model.pytorch_lstm
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.0
n_epochs: 200
lr: 1e-3
early_stop: 20
batch_size: 800
metric: IC
loss: mse
seed: 0
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,3 @@
numpy==1.17.4
pandas==1.1.2
xgboost==1.2.1

View File

@@ -0,0 +1,62 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: XGBModel
module_path: qlib.contrib.model.xgboost
kwargs:
objective: reg:linear
n_estimators: 5000
colsample_bytree: 0.85
learning_rate: 0.0421
subsample: 0.8789
max_depth: 8
num_leaves: 210
num_threads: 20
missing: -1
min_child_weight: 1
nthread: 4
tree_method: hist
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,64 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import numpy as np
import pandas as pd
import xgboost as xgb
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
class XGBModel(Model):
"""XGBModel Model"""
def __init__(self, obj="mse", **kwargs):
if obj not in {"mse", "binary"}:
raise NotImplementedError
self._params = {"obj": obj}
self._params.update(kwargs)
self.model = None
def fit(
self,
dataset: DatasetH,
num_boost_round=1000,
early_stopping_rounds=50,
verbose_eval=20,
evals_result=dict(),
**kwargs
):
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
# Lightgbm need 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
else:
raise ValueError("XGBoost doesn't support multi-label training")
dtrain = xgb.DMatrix(x_train.values, label=y_train_1d)
dvalid = xgb.DMatrix(x_valid.values, label=y_valid_1d)
self.model = xgb.train(
self._params,
dtrain=dtrain,
num_boost_round=num_boost_round,
evals=[(dtrain, "train"), (dvalid, "valid")],
early_stopping_rounds=early_stopping_rounds,
verbose_eval=verbose_eval,
evals_result=evals_result,
**kwargs
)
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, dataset):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
return pd.Series(self.model.predict(xgb.DMatrix(np.squeeze(x_test.values))), index=x_test.index)

View File

@@ -1,222 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import json\n",
"import yaml\n",
"import pickle\n",
"from pathlib import Path\n",
"\n",
"import qlib\n",
"import pandas as pd\n",
"from qlib.config import REG_CN\n",
"from qlib.utils import exists_qlib_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CUR_DIR = Path.cwd()\n",
"MARKET = \"csi300\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# use default data\n",
"# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data\n",
"provider_uri = \"~/.qlib/qlib_data/cn_data\" # target_dir\n",
"if not exists_qlib_data(provider_uri):\n",
" print(f\"Qlib data is not found in {provider_uri}\")\n",
" sys.path.append(str(CUR_DIR.parent.parent.joinpath(\"scripts\")))\n",
" from get_data import GetData\n",
" GetData().qlib_data(target_dir=provider_uri)\n",
"qlib.init(provider_uri=provider_uri, region=REG_CN)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with CUR_DIR.joinpath('estimator_config.yaml').open() as fp:\n",
" estimator_name = yaml.load(fp, Loader=yaml.FullLoader)['experiment']['name']\n",
"with CUR_DIR.joinpath(estimator_name, 'exp_info.json').open() as fp:\n",
" latest_id = json.load(fp)['id']\n",
" \n",
"estimator_dir = CUR_DIR.joinpath(estimator_name, 'sacred', latest_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# read estimator result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pred_df = pd.read_pickle(estimator_dir.joinpath('pred.pkl'))\n",
"report_normal_df = pd.read_pickle(estimator_dir.joinpath('report_normal.pkl'))\n",
"report_normal_df.index.names = ['index']\n",
"\n",
"analysis_df = pd.read_pickle(estimator_dir.joinpath('analysis.pkl'))\n",
"positions = pickle.load(estimator_dir.joinpath('positions.pkl').open('rb'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# analyze graphs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from qlib.data import D\n",
"from qlib.contrib.report import analysis_model, analysis_position\n",
"pred_df_dates = pred_df.index.get_level_values(level='datetime')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## analysis position"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
"stock_ret.columns = ['label']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### report"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_position.report_graph(report_normal_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### risk analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## analysis model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
"label_df.columns = ['label']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### score IC"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
"analysis_position.score_ic_graph(pred_label)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### model performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_model.model_performance_graph(pred_label)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -1,53 +0,0 @@
experiment:
name: estimator_example
observer_type: file_storage
mode: train
model:
class: LGBModel
module_path: qlib.gbdt.model.gbdt
args:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
data:
class: Alpha158
args:
dropna_label: True
filter:
market: csi300
trainer:
class: StaticTrainer
args:
train_start_date: 2008-01-01
train_end_date: 2014-12-31
validate_start_date: 2015-01-01
validate_end_date: 2016-12-31
test_start_date: 2017-01-01
test_end_date: 2020-08-01
strategy:
class: TopkDropoutStrategy
args:
topk: 50
n_drop: 5
backtest:
normal_backtest_args:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: SH000300
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
qlib_data:
# when testing, please modify the following parameters according to the specific environment
provider_uri: "~/.qlib/qlib_data/cn_data"
region: "cn"

View File

@@ -1,55 +0,0 @@
experiment:
name: estimator_example
observer_type: file_storage
mode: train
model:
module_path: qlib.model.pytorch_nn
class: DNNModelPytorch
args:
loss: mse
input_dim: 158
output_dim: 1
lr: 0.002
lr_decay: 0.96
lr_decay_steps: 100
optimizer: 'adam'
max_steps: 8000
batch_size: 4096
GPU: '0'
data:
class: Alpha158
args:
dropna_label: True
dropna_feature: True
filter:
market: csi300
trainer:
class: StaticTrainer
args:
train_start_date: 2007-01-01
train_end_date: 2014-12-31
validate_start_date: 2015-01-01
validate_end_date: 2016-12-31
test_start_date: 2017-01-01
test_end_date: 2020-08-01
strategy:
class: TopkDropoutStrategy
args:
topk: 50
n_drop: 5
backtest:
normal_backtest_args:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: SH000300
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
qlib_data:
# when testing, please modify the following parameters according to the specific environment
provider_uri: "~/.qlib/qlib_data/cn_data"
region: "cn"

View File

@@ -1,121 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import exists_qlib_data
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data(target_dir=provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)
MARKET = "CSI300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG = {
"dropna_label": True,
"start_date": "2008-01-01",
"end_date": "2020-08-01",
"market": MARKET,
}
TRAINER_CONFIG = {
"train_start_date": "2008-01-01",
"train_end_date": "2014-12-31",
"validate_start_date": "2015-01-01",
"validate_end_date": "2016-12-31",
"test_start_date": "2017-01-01",
"test_end_date": "2020-08-01",
}
# use default DataHandler
# custom DataHandler, refer to: TODO: DataHandler API url
x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
**TRAINER_CONFIG
)
MODEL_CONFIG = {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
}
# use default model
# custom Model, refer to: TODO: Model API url
model = LGBModel(**MODEL_CONFIG)
model.fit(x_train, y_train, x_validate, y_validate)
_pred = model.predict(x_test)
_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)
# backtest requires pred_score
pred_score = pd.DataFrame(index=_pred.index)
pred_score["score"] = _pred.iloc(axis=1)[0]
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
pred_score_path.parent.mkdir(exist_ok=True, parents=True)
pred_score.to_pickle(pred_score_path)
###################################
# backtest
###################################
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
# use default strategy
# custom Strategy, refer to: TODO: Strategy API url
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
###################################
# analyze
# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
###################################
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)

View File

@@ -1,338 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from pathlib import Path\n",
"\n",
"import qlib\n",
"import pandas as pd\n",
"from qlib.config import REG_CN\n",
"from qlib.contrib.model.gbdt import LGBModel\n",
"from qlib.contrib.estimator.handler import Alpha158\n",
"from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
"from qlib.contrib.evaluate import (\n",
" backtest as normal_backtest,\n",
" risk_analysis,\n",
")\n",
"from qlib.utils import exists_qlib_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# use default data\n",
"# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn\n",
"provider_uri = \"~/.qlib/qlib_data/cn_data\" # target_dir\n",
"if not exists_qlib_data(provider_uri):\n",
" print(f\"Qlib data is not found in {provider_uri}\")\n",
" sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n",
" from get_data import GetData\n",
" GetData().qlib_data(target_dir=provider_uri)\n",
"qlib.init(provider_uri=provider_uri, region=REG_CN)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"MARKET = \"csi300\"\n",
"BENCHMARK = \"SH000300\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# train model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"###################################\n",
"# train model\n",
"###################################\n",
"DATA_HANDLER_CONFIG = {\n",
" \"dropna_label\": True,\n",
" \"start_date\": \"2008-01-01\",\n",
" \"end_date\": \"2020-08-01\",\n",
" \"market\": MARKET,\n",
"}\n",
"\n",
"TRAINER_CONFIG = {\n",
" \"train_start_date\": \"2008-01-01\",\n",
" \"train_end_date\": \"2014-12-31\",\n",
" \"validate_start_date\": \"2015-01-01\",\n",
" \"validate_end_date\": \"2016-12-31\",\n",
" \"test_start_date\": \"2017-01-01\",\n",
" \"test_end_date\": \"2020-08-01\",\n",
"}\n",
"\n",
"# use default DataHandler\n",
"# custom DataHandler, refer to: TODO: DataHandler api url\n",
"x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(**TRAINER_CONFIG)\n",
"\n",
"\n",
"MODEL_CONFIG = {\n",
" \"loss\": \"mse\",\n",
" \"colsample_bytree\": 0.8879,\n",
" \"learning_rate\": 0.0421,\n",
" \"subsample\": 0.8789,\n",
" \"lambda_l1\": 205.6999,\n",
" \"lambda_l2\": 580.9768,\n",
" \"max_depth\": 8,\n",
" \"num_leaves\": 210,\n",
" \"num_threads\": 20,\n",
"}\n",
"# use default model\n",
"# custom Model, refer to: TODO: Model api url\n",
"model = LGBModel(**MODEL_CONFIG)\n",
"model.fit(x_train, y_train, x_validate, y_validate)\n",
"_pred = model.predict(x_test)\n",
"_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)\n",
"\n",
"# backtest requires pred_score\n",
"pred_score = pd.DataFrame(index=_pred.index)\n",
"pred_score[\"score\"] = _pred.iloc(axis=1)[0]\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# backtest"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"###################################\n",
"# backtest\n",
"###################################\n",
"STRATEGY_CONFIG = {\n",
" \"topk\": 50,\n",
" \"n_drop\": 5}\n",
"BACKTEST_CONFIG = {\n",
" \"verbose\": False,\n",
" \"limit_threshold\": 0.095,\n",
" \"account\": 100000000,\n",
" \"benchmark\": BENCHMARK,\n",
" \"deal_price\": \"close\",\n",
" \"open_cost\": 0.0005,\n",
" \"close_cost\": 0.0015,\n",
" \"min_cost\": 5,\n",
" \n",
"}\n",
"\n",
"# use default strategy\n",
"# custom Strategy, refer to: TODO: Strategy api url\n",
"strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)\n",
"report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# analyze"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"###################################\n",
"# analyze\n",
"# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb\n",
"###################################\n",
"analysis = dict()\n",
"analysis[\"excess_return_without_cost\"] = risk_analysis(report_normal[\"return\"] - report_normal[\"bench\"])\n",
"analysis[\"excess_return_with_cost\"] = risk_analysis(\n",
" report_normal[\"return\"] - report_normal[\"bench\"] - report_normal[\"cost\"]\n",
")\n",
"analysis_df = pd.concat(analysis) # type: pd.DataFrame\n",
"print(analysis_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# analyze graphs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from qlib.contrib.report import analysis_model, analysis_position\n",
"from qlib.data import D\n",
"pred_df_dates = pred_score.index.get_level_values(level='datetime')\n",
"report_normal_df = report_normal\n",
"positions = positions_normal\n",
"pred_df = pred_score"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## analysis position"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
"stock_ret.columns = ['label']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### report"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_position.report_graph(report_normal_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### risk analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## analysis model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
"label_df.columns = ['label']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### score IC"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
"analysis_position.score_ic_graph(pred_label)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### model performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_model.model_performance_graph(pred_label)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -1,145 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.pytorch_gats import GAT
from qlib.contrib.data.handler import ALPHA360_Denoise
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import exists_qlib_data
# from qlib.model.learner import train_model
from qlib.utils import init_instance_by_config
import pickle
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data_cn(target_dir=provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)
MARKET = "csi300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"fit_start_time": "2008-01-01",
"fit_end_time": "2014-12-31",
"instruments": MARKET,
}
TRAINER_CONFIG = {
"train_start_time": "2008-01-01",
"train_end_time": "2014-12-31",
"validate_start_time": "2015-01-01",
"validate_end_time": "2016-12-31",
"test_start_time": "2017-01-01",
"test_end_time": "2020-08-01",
}
task = {
"model": {
"class": "GAT",
"module_path": "qlib.contrib.model.pytorch_gats",
"kwargs": {
"d_feat": 6,
"hidden_size": 64,
"num_layers": 2,
"dropout": 0.0,
"n_epochs": 200,
"lr": 1e-3,
"early_stop": 20,
"batch_size": 800,
"metric": "IC",
"loss": "mse",
"base_model":"GRU",
"seed": 0,
"GPU": 0,
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "ALPHA360_Denoise",
"module_path": "qlib.contrib.data.handler",
"kwargs": DATA_HANDLER_CONFIG,
},
"segments": {
"train": ("2008-01-01", "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", "2020-08-01"),
},
},
}
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
}
# model = train_model(task)
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset)
pred_score = model.predict(dataset)
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
pred_score_path.parent.mkdir(exist_ok=True, parents=True)
pred_score.to_pickle(pred_score_path)
###################################
# backtest
###################################
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
# use default strategy
# custom Strategy, refer to: TODO: Strategy API url
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
###################################
# analyze
# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
###################################
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)

View File

@@ -1,144 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.pytorch_gru import GRU
from qlib.contrib.data.handler import ALPHA360_Denoise
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import exists_qlib_data
# from qlib.model.learner import train_model
from qlib.utils import init_instance_by_config
import pickle
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data_cn(target_dir=provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)
MARKET = "csi300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"fit_start_time": "2008-01-01",
"fit_end_time": "2014-12-31",
"instruments": MARKET,
}
TRAINER_CONFIG = {
"train_start_time": "2008-01-01",
"train_end_time": "2014-12-31",
"validate_start_time": "2015-01-01",
"validate_end_time": "2016-12-31",
"test_start_time": "2017-01-01",
"test_end_time": "2020-08-01",
}
task = {
"model": {
"class": "GRU",
"module_path": "qlib.contrib.model.pytorch_gru",
"kwargs": {
"d_feat": 6,
"hidden_size": 64,
"num_layers": 2,
"dropout": 0.0,
"n_epochs": 200,
"lr": 1e-3,
"early_stop": 20,
"batch_size": 800,
"metric": "IC",
"loss": "mse",
"seed": 0,
"GPU": 0,
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "ALPHA360_Denoise",
"module_path": "qlib.contrib.data.handler",
"kwargs": DATA_HANDLER_CONFIG,
},
"segments": {
"train": ("2008-01-01", "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", "2020-08-01"),
},
},
}
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
}
# model = train_model(task)
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset)
pred_score = model.predict(dataset)
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
pred_score_path.parent.mkdir(exist_ok=True, parents=True)
pred_score.to_pickle(pred_score_path)
###################################
# backtest
###################################
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
# use default strategy
# custom Strategy, refer to: TODO: Strategy API url
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
###################################
# analyze
# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
###################################
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)

View File

@@ -1,144 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.pytorch_lstm import LSTM
from qlib.contrib.data.handler import ALPHA360_Denoise
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import exists_qlib_data
# from qlib.model.learner import train_model
from qlib.utils import init_instance_by_config
import pickle
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data_cn(target_dir=provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)
MARKET = "csi300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"fit_start_time": "2008-01-01",
"fit_end_time": "2014-12-31",
"instruments": MARKET,
}
TRAINER_CONFIG = {
"train_start_time": "2008-01-01",
"train_end_time": "2014-12-31",
"validate_start_time": "2015-01-01",
"validate_end_time": "2016-12-31",
"test_start_time": "2017-01-01",
"test_end_time": "2020-08-01",
}
task = {
"model": {
"class": "LSTM",
"module_path": "qlib.contrib.model.pytorch_lstm",
"kwargs": {
"d_feat": 6,
"hidden_size": 64,
"num_layers": 2,
"dropout": 0.0,
"n_epochs": 200,
"lr": 1e-3,
"early_stop": 20,
"batch_size": 800,
"metric": "IC",
"loss": "mse",
"seed": 0,
"GPU": 0,
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "ALPHA360_Denoise",
"module_path": "qlib.contrib.data.handler",
"kwargs": DATA_HANDLER_CONFIG,
},
"segments": {
"train": ("2008-01-01", "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", "2020-08-01"),
},
},
}
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
}
# model = train_model(task)
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset)
pred_score = model.predict(dataset)
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
pred_score_path.parent.mkdir(exist_ok=True, parents=True)
pred_score.to_pickle(pred_score_path)
###################################
# backtest
###################################
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
# use default strategy
# custom Strategy, refer to: TODO: Strategy API url
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
###################################
# analyze
# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
###################################
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)

View File

@@ -1,142 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.xgboost import XGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import exists_qlib_data
# from qlib.model.learner import train_model
from qlib.utils import init_instance_by_config
if __name__ == "__main__":
# use default data
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data_cn(target_dir=provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)
MARKET = "csi300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"fit_start_time": "2008-01-01",
"fit_end_time": "2014-12-31",
"instruments": MARKET,
}
TRAINER_CONFIG = {
"train_start_time": "2008-01-01",
"train_end_time": "2014-12-31",
"validate_start_time": "2015-01-01",
"validate_end_time": "2016-12-31",
"test_start_time": "2017-01-01",
"test_end_time": "2020-08-01",
}
task = {
"model": {
"class": "XGBModel",
"module_path": "qlib.contrib.model.xgboost",
"kwargs": {
"objective": "reg:linear",
"n_estimators": 5000,
"colsample_bytree": 0.85,
"learning_rate": 0.0421,
"subsample": 0.8789,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
"missing": -1,
"min_child_weight": 1,
"nthread": 4,
"tree_method": "hist",
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": DATA_HANDLER_CONFIG,
},
"segments": {
"train": ("2008-01-01", "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", "2020-08-01"),
},
},
}
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
}
# model = train_model(task)
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset)
pred_score = model.predict(dataset)
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
pred_score_path.parent.mkdir(exist_ok=True, parents=True)
pred_score.to_pickle(pred_score_path)
###################################
# backtest
###################################
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
# use default strategy
# custom Strategy, refer to: TODO: Strategy API url
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
###################################
# analyze
# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
###################################
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)

View File

@@ -121,7 +121,11 @@ class GAT(Model):
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
self.GAT_model = GATModel(
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, base_model=self.base_model
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
base_model=self.base_model,
)
if optimizer.lower() == "adam":
self.train_optimizer = optim.Adam(self.GAT_model.parameters(), lr=self.lr)
@@ -321,11 +325,10 @@ class GAT(Model):
class GATModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model='GRU'):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"):
super().__init__()
if base_model == 'GRU':
if base_model == "GRU":
self.rnn = nn.GRU(
input_size=d_feat,
hidden_size=hidden_size,
@@ -333,7 +336,7 @@ class GATModel(nn.Module):
batch_first=True,
dropout=dropout,
)
elif base_model == 'LSTM':
elif base_model == "LSTM":
self.rnn = nn.LSTM(
input_size=d_feat,
hidden_size=hidden_size,
@@ -342,7 +345,7 @@ class GATModel(nn.Module):
dropout=dropout,
)
else:
raise ValueError('unknown base model name `%s`'%base_model)
raise ValueError("unknown base model name `%s`" % base_model)
self.hidden_size = hidden_size
self.bn1 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
@@ -354,19 +357,19 @@ class GATModel(nn.Module):
self.d_feat = d_feat
def cal_convariance(self, x, y): # the 2nd dimension of x and y are the same
e_x = torch.mean(x, dim = 1).reshape(-1, 1)
e_y = torch.mean(y, dim = 1).reshape(-1, 1)
def cal_convariance(self, x, y): # the 2nd dimension of x and y are the same
e_x = torch.mean(x, dim=1).reshape(-1, 1)
e_y = torch.mean(y, dim=1).reshape(-1, 1)
e_x_e_y = e_x.mm(torch.t(e_y))
x_extend = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
y_extend = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
e_xy = torch.mean(x_extend*y_extend, dim = 2)
e_xy = torch.mean(x_extend * y_extend, dim=2)
return e_xy - e_x_e_y
def forward(self, x):
# x: [N, F*T]
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
out, _ = self.rnn(x)
hidden = out[:, -1, :]
hidden = self.bn1(hidden)
@@ -380,4 +383,4 @@ class GATModel(nn.Module):
output = self.fc(output)
output = self.bn2(output)
output = self.leaky_relu(output)
return self.fc_out(output).squeeze()
return self.fc_out(output).squeeze()

View File

@@ -317,7 +317,6 @@ class LSTM(Model):
class LSTMModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
super().__init__()
@@ -334,7 +333,7 @@ class LSTMModel(nn.Module):
def forward(self, x):
# x: [N, F*T]
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
out, _ = self.rnn(x)
return self.fc_out(out[:, -1, :]).squeeze()
return self.fc_out(out[:, -1, :]).squeeze()

View File

@@ -75,11 +75,12 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
max_start_date, max_end_date = _calculate_maximum(report_df)
ex_max_start_date, ex_max_end_date = _calculate_maximum(report_df, True)
index_name = report_df.index.name
_temp_df = report_df.reset_index()
_temp_df.loc[-1] = 0
_temp_df = _temp_df.shift(1)
_temp_df.loc[0, "index"] = "T0"
_temp_df.set_index("index", inplace=True)
_temp_df.loc[0, index_name] = "T0"
_temp_df.set_index(index_name, inplace=True)
_temp_df.iloc[0] = 0
report_df = _temp_df

View File

@@ -11,7 +11,7 @@ import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
from plotly.tools import make_subplots
from plotly.subplots import make_subplots
from plotly.figure_factory import create_distplot
from ...utils import get_module_by_module_path
@@ -357,7 +357,7 @@ class SubplotsGraph(object):
# _item.pop('yaxis', None)
for _g_obj in _graph_data:
self._figure.append_trace(_g_obj, row=row, col=col)
self._figure.add_trace(_g_obj, row=row, col=col)
if self._sub_graph_layout is not None:
for k, v in self._sub_graph_layout.items():

View File

@@ -6,8 +6,8 @@ from pathlib import Path
import qlib
import fire
import yaml
import pandas as pd
import ruamel.yaml as yaml
from qlib.config import REG_CN
from qlib.utils import init_instance_by_config
from qlib.workflow import R
@@ -16,7 +16,7 @@ from qlib.workflow.record_temp import SignalRecord
# worflow handler function
def workflow(config_path):
with open(config_path) as fp:
config = yaml.load(fp, Loader=yaml.FullLoader)
config = yaml.load(fp, Loader=yaml.Loader)
provider_uri = config.get("provider_uri")
qlib.init(provider_uri=provider_uri, region=REG_CN)
@@ -26,7 +26,8 @@ def workflow(config_path):
dataset = init_instance_by_config(config.get("task")["dataset"])
# start exp
with R.start("workflow"):
with R.start(experiment_name="workflow"):
R.log_paramters(**flatten_dict(task))
model.fit(dataset)
recorder = R.get_recorder()

View File

@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import re
import pandas as pd
from pathlib import Path
from pprint import pprint
@@ -37,12 +38,14 @@ class RecordTemp:
"""
raise NotImplementedError(f"Please implement the `generate` method.")
def load(self, **kwargs):
def load(self, name, **kwargs):
"""
Load the stored records.
Parameters
----------
name : str
the name for the file to be load.
kwargs
Return
@@ -51,6 +54,16 @@ class RecordTemp:
"""
raise NotImplementedError(f"Please implement the `load` method.")
def list(self):
"""
List the stored records.
Return
------
A list of all the stored records.
"""
raise NotImplementedError(f"Please implement the `list` method.")
def check(self, **kwargs):
"""
Check if the records is properly generated and saved.
@@ -81,6 +94,8 @@ class SignalRecord(RecordTemp):
def generate(self, **kwargs):
# generate prediciton
pred = self.model.predict(self.dataset)
if isinstance(pred, pd.Series):
pred = pred.to_frame("score")
self.recorder.save_objects(**{"pred.pkl": pred})
logger.info(
f"Signal record 'pred.pkl' has been saved as the artifact of the Experiment {self.recorder.experiment_id}"
@@ -89,11 +104,14 @@ class SignalRecord(RecordTemp):
pprint(f"The following are prediction results of the {type(self.model).__name__} model.")
pprint(pred.head(5))
def load(self):
def load(self, name="pred.pkl"):
# try to load the saved object
pred = self.recorder.load_object("pred.pkl")
pred = self.recorder.load_object(name)
return pred
def list(self):
return ["pred.pkl"]
def check(self, **kwargs):
artifacts = self.recorder.list_artifacts()
for artifact in artifacts:
@@ -165,10 +183,20 @@ class PortAnaRecord(SignalRecord):
pprint("The following are analysis results of the excess return with cost.")
pprint(analysis["excess_return_with_cost"])
def load(self):
def load(self, name):
# try to load the saved object
pred = self.recorder.load_object(self.artifact_path / "port_analysis.pkl")
return pred
if self.artifact_path not in name:
file_name = re.split(r" |/|\\", name)[-1]
name = f"{self.artifact_path}/{file_name}"
result = self.recorder.load_object(name)
return result
def list(self):
return [
f"{self.artifact_path}/report_normal.pkl",
f"{self.artifact_path}/positions_normal.pkl",
f"{self.artifact_path}/port_analysis.pkl",
]
def check(self):
artifacts = self.recorder.list_artifacts(self.artifact_path)