mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 02:50:58 +08:00
add linear model
This commit is contained in:
3
examples/benchmarks/Linear/requirements.txt
Normal file
3
examples/benchmarks/Linear/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
numpy>=1.17.4
|
||||
pandas>=1.0.1
|
||||
scikit-learn>=0.23.1
|
||||
71
examples/benchmarks/Linear/workflow_config_linear.yaml
Normal file
71
examples/benchmarks/Linear/workflow_config_linear.yaml
Normal file
@@ -0,0 +1,71 @@
|
||||
provider_uri: "~/.qlib/qlib_data/cn_data"
|
||||
region: cn
|
||||
market: &market csi300
|
||||
benchmark: &benchmark SH000300
|
||||
data_handler_config: &data_handler_config
|
||||
start_time: 2008-01-01
|
||||
end_time: 2020-08-01
|
||||
fit_start_time: 2008-01-01
|
||||
fit_end_time: 2014-12-31
|
||||
instruments: *market
|
||||
infer_processors:
|
||||
- class: RobustZScoreNorm
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
clip_outlier: true
|
||||
- class: Fillna
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
learn_processors:
|
||||
- class: DropnaLabel
|
||||
- class: CSRankNorm
|
||||
kwargs:
|
||||
fields_group: label
|
||||
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
|
||||
port_analysis_config: &port_analysis_config
|
||||
strategy:
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy.strategy
|
||||
kwargs:
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
verbose: False
|
||||
limit_threshold: 0.095
|
||||
account: 100000000
|
||||
benchmark: *benchmark
|
||||
deal_price: close
|
||||
open_cost: 0.0005
|
||||
close_cost: 0.0015
|
||||
min_cost: 5
|
||||
task:
|
||||
model:
|
||||
class: LinearModel
|
||||
module_path: qlib.contrib.model.linear
|
||||
kwargs:
|
||||
estimator: ols
|
||||
dataset:
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: Alpha158
|
||||
module_path: qlib.contrib.data.handler
|
||||
kwargs: *data_handler_config
|
||||
segments:
|
||||
train: [2008-01-01, 2014-12-31]
|
||||
valid: [2015-01-01, 2016-12-31]
|
||||
test: [2017-01-01, 2020-08-01]
|
||||
record:
|
||||
- class: SignalRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs: {}
|
||||
- class: SigAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
ana_long_short: True
|
||||
ann_scaler: 252
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
91
qlib/contrib/model/linear.py
Normal file
91
qlib/contrib/model/linear.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from scipy.optimize import nnls
|
||||
from sklearn.linear_model import LinearRegression, Ridge, Lasso
|
||||
|
||||
from ...model.base import Model
|
||||
from ...data.dataset import DatasetH
|
||||
from ...data.dataset.handler import DataHandlerLP
|
||||
|
||||
|
||||
class LinearModel(Model):
|
||||
"""Linear Model
|
||||
|
||||
Solve one of the following regression problems:
|
||||
- `ols`: min_w |y - Xw|^2_2
|
||||
- `nnls`: min_w |y - Xw|^2_2, s.t. w >= 0
|
||||
- `ridge`: min_w |y - Xw|^2_2 + \alpha*|w|^2_2
|
||||
- `lasso`: min_w |y - Xw|^2_2 + \alpha*|w|_1
|
||||
where `w` is the regression coefficient.
|
||||
"""
|
||||
|
||||
OLS = "ols"
|
||||
NNLS = "nnls"
|
||||
RIDGE = "ridge"
|
||||
LASSO = "lasso"
|
||||
|
||||
def __init__(self, estimator="ols", alpha=0.0, fit_intercept=False):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
estimator : str
|
||||
which estimator to use for linear regression
|
||||
alpha : float
|
||||
l1 or l2 regularization parameter
|
||||
fit_intercept : bool
|
||||
whether fit intercept
|
||||
"""
|
||||
assert estimator in [self.OLS, self.NNLS, self.RIDGE, self.LASSO], f"unsupported estimator `{estimator}`"
|
||||
self.estimator = estimator
|
||||
|
||||
assert alpha == 0 or estimator in [self.RIDGE, self.LASSO], f"alpha is only supported in `ridge`&`lasso`"
|
||||
self.alpha = alpha
|
||||
|
||||
self.fit_intercept = fit_intercept
|
||||
|
||||
self.coef_ = None
|
||||
|
||||
def fit(self, dataset: DatasetH):
|
||||
df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||
X, y = df_train["feature"].values, np.squeeze(df_train["label"].values)
|
||||
|
||||
if self.estimator in [self.OLS, self.RIDGE, self.LASSO]:
|
||||
self._fit(X, y)
|
||||
elif self.estimator == self.NNLS:
|
||||
self._fit_nnls(X, y)
|
||||
else:
|
||||
raise ValueError(f"unknown estimator `{self.estimator}`")
|
||||
|
||||
return self
|
||||
|
||||
def _fit(self, X, y):
|
||||
if self.estimator == self.OLS:
|
||||
model = LinearRegression(fit_intercept=self.fit_intercept, copy_X=False)
|
||||
else:
|
||||
model = {self.RIDGE: Ridge, self.LASSO: Lasso}[self.estimator](
|
||||
alpha=self.alpha, fit_intercept=self.fit_intercept, copy_X=False
|
||||
)
|
||||
model.fit(X, y)
|
||||
self.coef_ = model.coef_
|
||||
self.intercept_ = model.intercept_
|
||||
|
||||
def _fit_nnls(self, X, y):
|
||||
if self.fit_intercept:
|
||||
X = np.c_[X, np.ones(len(X))] # NOTE: mem copy
|
||||
coef = nnls(X, y)[0]
|
||||
if self.fit_intercept:
|
||||
self.coef_ = coef[:-1]
|
||||
self.intercept_ = coef[-1]
|
||||
else:
|
||||
self.coef_ = coef
|
||||
self.intercept_ = 0.0
|
||||
|
||||
def predict(self, dataset):
|
||||
if self.coef_ is None:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
|
||||
return pd.Series(x_test.values @ self.coef_ + self.intercept_, index=x_test.index)
|
||||
Reference in New Issue
Block a user