1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-03 02:50:58 +08:00

add linear model

This commit is contained in:
Dong Zhou
2020-11-29 17:22:37 +08:00
parent 0fb0109f9c
commit b3657d1c8f
3 changed files with 165 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
numpy>=1.17.4
pandas>=1.0.1
scikit-learn>=0.23.1

View File

@@ -0,0 +1,71 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LinearModel
module_path: qlib.contrib.model.linear
kwargs:
estimator: ols
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: True
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,91 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import numpy as np
import pandas as pd
from scipy.optimize import nnls
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
class LinearModel(Model):
"""Linear Model
Solve one of the following regression problems:
- `ols`: min_w |y - Xw|^2_2
- `nnls`: min_w |y - Xw|^2_2, s.t. w >= 0
- `ridge`: min_w |y - Xw|^2_2 + \alpha*|w|^2_2
- `lasso`: min_w |y - Xw|^2_2 + \alpha*|w|_1
where `w` is the regression coefficient.
"""
OLS = "ols"
NNLS = "nnls"
RIDGE = "ridge"
LASSO = "lasso"
def __init__(self, estimator="ols", alpha=0.0, fit_intercept=False):
"""
Parameters
----------
estimator : str
which estimator to use for linear regression
alpha : float
l1 or l2 regularization parameter
fit_intercept : bool
whether fit intercept
"""
assert estimator in [self.OLS, self.NNLS, self.RIDGE, self.LASSO], f"unsupported estimator `{estimator}`"
self.estimator = estimator
assert alpha == 0 or estimator in [self.RIDGE, self.LASSO], f"alpha is only supported in `ridge`&`lasso`"
self.alpha = alpha
self.fit_intercept = fit_intercept
self.coef_ = None
def fit(self, dataset: DatasetH):
df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
X, y = df_train["feature"].values, np.squeeze(df_train["label"].values)
if self.estimator in [self.OLS, self.RIDGE, self.LASSO]:
self._fit(X, y)
elif self.estimator == self.NNLS:
self._fit_nnls(X, y)
else:
raise ValueError(f"unknown estimator `{self.estimator}`")
return self
def _fit(self, X, y):
if self.estimator == self.OLS:
model = LinearRegression(fit_intercept=self.fit_intercept, copy_X=False)
else:
model = {self.RIDGE: Ridge, self.LASSO: Lasso}[self.estimator](
alpha=self.alpha, fit_intercept=self.fit_intercept, copy_X=False
)
model.fit(X, y)
self.coef_ = model.coef_
self.intercept_ = model.intercept_
def _fit_nnls(self, X, y):
if self.fit_intercept:
X = np.c_[X, np.ones(len(X))] # NOTE: mem copy
coef = nnls(X, y)[0]
if self.fit_intercept:
self.coef_ = coef[:-1]
self.intercept_ = coef[-1]
else:
self.coef_ = coef
self.intercept_ = 0.0
def predict(self, dataset):
if self.coef_ is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(x_test.values @ self.coef_ + self.intercept_, index=x_test.index)