mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 11:00:57 +08:00
Merge branch 'main' of https://github.com/you-n-g/qlib into main
This commit is contained in:
10
examples/benchmarks/ALSTM/README.md
Normal file
10
examples/benchmarks/ALSTM/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# ALSTM
|
||||
|
||||
- ALSTM contains a temporal attentive aggregation layer based on normal LSTM.
|
||||
|
||||
- The code used in Qlib is a pyTorch implementation of Code: https://github.com/fulifeng/Adv-ALSTM
|
||||
|
||||
- Paper: A dual-stage attention-based recurrent neural network for time series prediction.
|
||||
|
||||
https://www.ijcai.org/Proceedings/2017/0366.pdf
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
**GitHub**: https://github.com/google-research/google-research/tree/master/tft
|
||||
|
||||
## Run the Workflow
|
||||
Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. Please be **aware** that this script can only support Python 3.5 - 3.8.
|
||||
Users can follow the ``workflow_by_code_tft.py`` to run the benchmark.
|
||||
|
||||
### Notes
|
||||
1. The model must run in GPU, or an error will be raised.
|
||||
2. New datasets should be registered in ``data_formatters``, for detail please visit the source.
|
||||
1. Please be **aware** that this script can only support `Python 3.5 - 3.8`.
|
||||
2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
|
||||
3. The model must run in GPU, or an error will be raised.
|
||||
4. New datasets should be registered in ``data_formatters``, for detail please visit the source.
|
||||
|
||||
@@ -10,6 +10,7 @@ import shutil
|
||||
import tempfile
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from operator import xor
|
||||
from subprocess import Popen, PIPE
|
||||
from threading import Thread
|
||||
from pprint import pprint
|
||||
@@ -174,11 +175,22 @@ def cal_mean_std(results) -> dict:
|
||||
|
||||
|
||||
# function to get all the folders benchmark folder
|
||||
def get_all_folders() -> dict:
|
||||
def get_all_folders(models, exclude) -> dict:
|
||||
folders = dict()
|
||||
if isinstance(models, str):
|
||||
model_list = models.split(",")
|
||||
models = [m.lower().strip("[ ]") for m in model_list]
|
||||
elif isinstance(models, list):
|
||||
models = [m.lower() for m in models]
|
||||
elif models is None:
|
||||
models = [f.name.lower() for f in os.scandir("benchmarks")]
|
||||
else:
|
||||
raise ValueError("Input models type is not supported. Please provide str or list without space.")
|
||||
for f in os.scandir("benchmarks"):
|
||||
path = Path("benchmarks") / f.name
|
||||
folders[f.name] = str(path.resolve())
|
||||
add = xor(bool(f.name.lower() in models), bool(exclude))
|
||||
if add:
|
||||
path = Path("benchmarks") / f.name
|
||||
folders[f.name] = str(path.resolve())
|
||||
return folders
|
||||
|
||||
|
||||
@@ -225,13 +237,44 @@ def gen_and_save_md_table(metrics):
|
||||
|
||||
|
||||
# function to run the all the models
|
||||
def run(times=1):
|
||||
def run(times=1, models=None, exclude=False):
|
||||
"""
|
||||
Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
|
||||
Any PR to enhance this method is highly welcomed.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
times : int
|
||||
determines how many times the model should be running.
|
||||
models : str or list
|
||||
determines the specific model or list of models to run or exclude.
|
||||
exclude : boolean
|
||||
determines whether the model being used is excluded or included.
|
||||
|
||||
Usage:
|
||||
-------
|
||||
Here are some use cases of the function in the bash:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Case 1 - run all models multiple times
|
||||
python run_all_model.py 3
|
||||
|
||||
# Case 2 - run specific models multiple times
|
||||
python run_all_model.py 3 dnn
|
||||
|
||||
# Case 3 - run other models except those are given as arguments for multiple times
|
||||
python run_all_model.py 3 [dnn,tft,lstm] True
|
||||
|
||||
# Case 4 - run specific models for one time
|
||||
python run_all_model.py --models=[dnn,lightgbm]
|
||||
|
||||
# Case 5 - run other models except those are given as aruments for one time
|
||||
python run_all_model.py --models=[dnn,tft,sfm] --exclude=True
|
||||
|
||||
"""
|
||||
# get all folders
|
||||
folders = get_all_folders()
|
||||
folders = get_all_folders(models, exclude)
|
||||
# set up
|
||||
compatible = True
|
||||
if sys.version_info < (3, 3):
|
||||
|
||||
@@ -7,19 +7,16 @@ from pathlib import Path
|
||||
import qlib
|
||||
import pandas as pd
|
||||
from qlib.config import REG_CN
|
||||
from qlib.contrib.model.pytorch_gats import GAT
|
||||
from qlib.contrib.data.handler import ALPHA360_Denoise
|
||||
|
||||
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
|
||||
from qlib.contrib.evaluate import (
|
||||
backtest as normal_backtest,
|
||||
risk_analysis,
|
||||
)
|
||||
from qlib.utils import exists_qlib_data
|
||||
|
||||
# from qlib.model.learner import train_model
|
||||
from qlib.utils import init_instance_by_config
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
@@ -71,21 +71,22 @@ if __name__ == "__main__":
|
||||
"module_path": "qlib.contrib.model.pytorch_sfm",
|
||||
"kwargs": {
|
||||
"d_feat": 6,
|
||||
"hidden_size": 32,
|
||||
"output_dim": 16,
|
||||
"hidden_size": 64,
|
||||
"output_dim": 32,
|
||||
"freq_dim": 25,
|
||||
"dropout_W": 0.5,
|
||||
"dropout_U": 0.5,
|
||||
"n_epochs": 200,
|
||||
"n_epochs": 15,
|
||||
"lr": 1e-3,
|
||||
"batch_size": 200,
|
||||
"metric": "",
|
||||
"batch_size": 1600,
|
||||
"early_stop": 20,
|
||||
"eval_steps": 5,
|
||||
"loss": "mse",
|
||||
"lr_decay": 0.96,
|
||||
"lr_decay_steps": 100,
|
||||
"optimizer": "adam",
|
||||
"GPU": 1,
|
||||
"GPU": 3,
|
||||
"seed": 710,
|
||||
},
|
||||
},
|
||||
|
||||
@@ -10,6 +10,28 @@ from inspect import getfullargspec
|
||||
import copy
|
||||
|
||||
|
||||
def check_transform_proc(proc_l, fit_start_time, fit_end_time):
|
||||
new_l = []
|
||||
for p in proc_l:
|
||||
if not isinstance(p, Processor):
|
||||
klass, pkwargs = get_cls_kwargs(p, processor_module)
|
||||
args = getfullargspec(klass).args
|
||||
if "fit_start_time" in args and "fit_end_time" in args:
|
||||
assert (
|
||||
fit_start_time is not None and fit_end_time is not None
|
||||
), "Make sure `fit_start_time` and `fit_end_time` are not None."
|
||||
pkwargs.update(
|
||||
{
|
||||
"fit_start_time": fit_start_time,
|
||||
"fit_end_time": fit_end_time,
|
||||
}
|
||||
)
|
||||
new_l.append({"class": klass.__name__, "kwargs": pkwargs})
|
||||
else:
|
||||
new_l.append(p)
|
||||
return new_l
|
||||
|
||||
|
||||
class ALPHA360_Denoise(DataHandlerLP):
|
||||
def __init__(self, instruments="csi500", start_time=None, end_time=None, fit_start_time=None, fit_end_time=None):
|
||||
data_loader = {
|
||||
@@ -83,8 +105,31 @@ class ALPHA360_Denoise(DataHandlerLP):
|
||||
return fields, names
|
||||
|
||||
|
||||
_DEFAULT_LEARN_PROCESSORS = [
|
||||
{"class": "DropnaLabel"},
|
||||
{"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
|
||||
]
|
||||
_DEFAULT_INFER_PROCESSORS = [
|
||||
{"class": "ProcessInf", "kwargs": {}},
|
||||
{"class": "ZScoreNorm", "kwargs": {}},
|
||||
{"class": "Fillna", "kwargs": {}},
|
||||
]
|
||||
|
||||
|
||||
class ALPHA360(DataHandlerLP):
|
||||
def __init__(self, instruments="csi500", start_time=None, end_time=None, fit_start_time=None, fit_end_time=None):
|
||||
def __init__(
|
||||
self,
|
||||
instruments="csi500",
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
infer_processors=_DEFAULT_INFER_PROCESSORS,
|
||||
learn_processors=_DEFAULT_LEARN_PROCESSORS,
|
||||
fit_start_time=None,
|
||||
fit_end_time=None,
|
||||
):
|
||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||
|
||||
data_loader = {
|
||||
"class": "QlibDataLoader",
|
||||
"kwargs": {
|
||||
@@ -95,16 +140,6 @@ class ALPHA360(DataHandlerLP):
|
||||
},
|
||||
}
|
||||
|
||||
learn_processors = [
|
||||
{"class": "DropnaLabel", "kwargs": {"fields_group": "label"}},
|
||||
{"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
|
||||
]
|
||||
infer_processors = [
|
||||
{"class": "ProcessInf", "kwargs": {}},
|
||||
{"class": "ZscoreNorm", "kwargs": {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time}},
|
||||
{"class": "Fillna", "kwargs": {}},
|
||||
]
|
||||
|
||||
super().__init__(
|
||||
instruments,
|
||||
start_time,
|
||||
@@ -168,33 +203,12 @@ class Alpha158(DataHandlerLP):
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
infer_processors=[],
|
||||
learn_processors=["DropnaLabel", {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}],
|
||||
learn_processors=_DEFAULT_LEARN_PROCESSORS,
|
||||
fit_start_time=None,
|
||||
fit_end_time=None,
|
||||
):
|
||||
def check_transform_proc(proc_l):
|
||||
new_l = []
|
||||
for p in proc_l:
|
||||
if not isinstance(p, Processor):
|
||||
klass, pkwargs = get_cls_kwargs(p, processor_module)
|
||||
args = getfullargspec(klass).args
|
||||
if "fit_start_time" in args and "fit_end_time" in args:
|
||||
assert (
|
||||
fit_start_time is not None and fit_end_time is not None
|
||||
), "Make sure `fit_start_time` and `fit_end_time` are not None."
|
||||
pkwargs.update(
|
||||
{
|
||||
"fit_start_time": fit_start_time,
|
||||
"fit_end_time": fit_end_time,
|
||||
}
|
||||
)
|
||||
new_l.append({"class": klass.__name__, "kwargs": pkwargs})
|
||||
else:
|
||||
new_l.append(p)
|
||||
return new_l
|
||||
|
||||
infer_processors = check_transform_proc(infer_processors)
|
||||
learn_processors = check_transform_proc(learn_processors)
|
||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||
|
||||
data_loader = {
|
||||
"class": "QlibDataLoader",
|
||||
|
||||
@@ -34,14 +34,14 @@ class CatBoostModel(Model):
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
num_boost_round=1000,
|
||||
early_stopping_rounds=50,
|
||||
verbose_eval=20,
|
||||
evals_result=dict(),
|
||||
num_boost_round = 1000,
|
||||
early_stopping_rounds = 50,
|
||||
verbose_eval = 20,
|
||||
evals_result = dict(),
|
||||
**kwargs
|
||||
):
|
||||
df_train, df_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
|
||||
["train", "valid"], col_set = ["feature", "label"], data_key = DataHandlerLP.DK_L
|
||||
)
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
@@ -52,8 +52,8 @@ class CatBoostModel(Model):
|
||||
else:
|
||||
raise ValueError("CatBoost doesn't support multi-label training")
|
||||
|
||||
train_pool = Pool(data=x_train, label=y_train_1d)
|
||||
valid_pool = Pool(data=x_valid, label=y_valid_1d)
|
||||
train_pool = Pool(data = x_train, label = y_train_1d)
|
||||
valid_pool = Pool(data = x_valid, label = y_valid_1d)
|
||||
|
||||
# Initialize the catboost model
|
||||
self._params["iterations"] = num_boost_round
|
||||
@@ -63,7 +63,7 @@ class CatBoostModel(Model):
|
||||
self.model = CatBoost(self._params, **kwargs)
|
||||
|
||||
# train the model
|
||||
self.model.fit(train_pool, eval_set=valid_pool, use_best_model=True, **kwargs)
|
||||
self.model.fit(train_pool, eval_set = valid_pool, use_best_model = True, **kwargs)
|
||||
|
||||
evals_result = self.model.get_evals_result()
|
||||
evals_result["train"] = list(evals_result["learn"].values())[0]
|
||||
@@ -72,8 +72,8 @@ class CatBoostModel(Model):
|
||||
def predict(self, dataset):
|
||||
if self.model is None:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
x_test = dataset.prepare("test", col_set="feature")
|
||||
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
|
||||
x_test = dataset.prepare("test", col_set = "feature")
|
||||
return pd.Series(self.model.predict(x_test.values), index = x_test.index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -28,14 +28,12 @@ class GAT(Model):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
input dimension
|
||||
output_dim : int
|
||||
output dimension
|
||||
layers : tuple
|
||||
layer sizes
|
||||
lr : float
|
||||
learning rate
|
||||
d_feat : int
|
||||
input dimensions for each time step
|
||||
metric : str
|
||||
the evaluate metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
@@ -119,11 +117,7 @@ class GAT(Model):
|
||||
seed,
|
||||
)
|
||||
)
|
||||
|
||||
if loss not in {"mse", "binary"}:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss))
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
|
||||
self.GAT_model = GATModel(
|
||||
d_feat=self.d_feat,
|
||||
hidden_size=self.hidden_size,
|
||||
@@ -213,7 +207,6 @@ class GAT(Model):
|
||||
losses = []
|
||||
|
||||
indices = np.arange(len(x_values))
|
||||
np.random.shuffle(indices)
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
|
||||
@@ -377,7 +370,6 @@ class GATModel(nn.Module):
|
||||
self.fc_out = nn.Linear(hidden_size, 1)
|
||||
self.leaky_relu = nn.LeakyReLU()
|
||||
self.softmax = nn.Softmax(dim=1)
|
||||
|
||||
self.d_feat = d_feat
|
||||
|
||||
def cal_convariance(self, x, y): # the 2nd dimension of x and y are the same
|
||||
@@ -396,12 +388,7 @@ class GATModel(nn.Module):
|
||||
out, _ = self.rnn(x)
|
||||
hidden = out[:, -1, :]
|
||||
hidden = self.bn1(hidden)
|
||||
|
||||
gamma = self.cal_convariance(hidden, hidden)
|
||||
# gamma = hidden.mm(torch.t(hidden))
|
||||
# gamma = self.leaky_relu(gamma)
|
||||
# gamma = self.softmax(gamma)
|
||||
# gamma = gamma * (torch.ones(x.shape[0], x.shape[0]).to(device) - torch.diag(torch.ones(x.shape[0])).to(device))
|
||||
output = gamma.mm(hidden)
|
||||
output = self.fc(output)
|
||||
output = self.bn2(output)
|
||||
|
||||
@@ -28,14 +28,10 @@ class GRU(Model):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
input dimension
|
||||
output_dim : int
|
||||
output dimension
|
||||
layers : tuple
|
||||
layer sizes
|
||||
lr : float
|
||||
learning rate
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluate metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
@@ -112,10 +108,6 @@ class GRU(Model):
|
||||
)
|
||||
)
|
||||
|
||||
if loss not in {"mse", "binary"}:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss))
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
self.gru_model = GRUModel(
|
||||
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
|
||||
)
|
||||
@@ -251,7 +243,6 @@ class GRU(Model):
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
# return
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
|
||||
@@ -28,14 +28,10 @@ class LSTM(Model):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
input dimension
|
||||
output_dim : int
|
||||
output dimension
|
||||
layers : tuple
|
||||
layer sizes
|
||||
lr : float
|
||||
learning rate
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluate metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
@@ -112,10 +108,6 @@ class LSTM(Model):
|
||||
)
|
||||
)
|
||||
|
||||
if loss not in {"mse", "binary"}:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss))
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
self.lstm_model = LSTMModel(
|
||||
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
|
||||
)
|
||||
@@ -251,7 +243,6 @@ class LSTM(Model):
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
# return
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
|
||||
@@ -31,7 +31,6 @@ from ...model.base import Model
|
||||
from ...data.dataset import DatasetH
|
||||
from ...data.dataset.handler import DataHandlerLP
|
||||
|
||||
|
||||
class SFM_Model(nn.Module):
|
||||
def __init__(self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu"):
|
||||
super().__init__()
|
||||
@@ -76,13 +75,13 @@ class SFM_Model(nn.Module):
|
||||
self.states = []
|
||||
|
||||
def forward(self, input):
|
||||
input = input.reshape(len(input), self.input_dim, -1) # [N, F, T]
|
||||
input = input.permute(0, 2, 1) # [N, T, F]
|
||||
input = input.reshape(len(input), self.input_dim, -1) # [N, F, T]
|
||||
input = input.permute(0, 2, 1) # [N, T, F]
|
||||
time_step = input.shape[1]
|
||||
|
||||
|
||||
for ts in range(time_step):
|
||||
x = input[:, ts, :]
|
||||
if len(self.states) == 0: # hasn't initialized yet
|
||||
x = input[:, ts,:]
|
||||
if len(self.states)==0: #hasn't initialized yet
|
||||
self.init_states(x)
|
||||
self.get_constants(x)
|
||||
p_tm1 = self.states[0]
|
||||
@@ -99,65 +98,64 @@ class SFM_Model(nn.Module):
|
||||
x_fre = torch.matmul(x * B_W[0], self.W_fre) + self.b_fre
|
||||
x_c = torch.matmul(x * B_W[0], self.W_c) + self.b_c
|
||||
x_o = torch.matmul(x * B_W[0], self.W_o) + self.b_o
|
||||
|
||||
i = self.inner_activation(
|
||||
x_i + torch.matmul(h_tm1 * B_U[0], self.U_i)
|
||||
) # not sure whether I am doing in the right unsquuze
|
||||
|
||||
i = self.inner_activation(x_i + torch.matmul(h_tm1 * B_U[0], self.U_i)) # not sure whether I am doing in the right unsquuze
|
||||
|
||||
|
||||
ste = self.inner_activation(x_ste + torch.matmul(h_tm1 * B_U[0], self.U_ste))
|
||||
fre = self.inner_activation(x_fre + torch.matmul(h_tm1 * B_U[0], self.U_fre))
|
||||
|
||||
ste = torch.reshape(ste, (-1, self.hidden_dim, 1))
|
||||
fre = torch.reshape(fre, (-1, 1, self.freq_dim))
|
||||
|
||||
|
||||
f = ste * fre
|
||||
|
||||
|
||||
c = i * self.activation(x_c + torch.matmul(h_tm1 * B_U[0], self.U_c))
|
||||
|
||||
time = time_tm1 + 1
|
||||
|
||||
omega = torch.tensor(2 * np.pi) * time * frequency
|
||||
|
||||
re = torch.cos(omega)
|
||||
re = torch.cos(omega)
|
||||
im = torch.sin(omega)
|
||||
|
||||
|
||||
c = torch.reshape(c, (-1, self.hidden_dim, 1))
|
||||
|
||||
S_re = f * S_re_tm1 + c * re
|
||||
S_im = f * S_im_tm1 + c * im
|
||||
|
||||
|
||||
A = torch.square(S_re) + torch.square(S_im)
|
||||
|
||||
A = torch.reshape(A, (-1, self.freq_dim)).float()
|
||||
A_a = torch.matmul(A * B_U[0], self.U_a)
|
||||
A_a = torch.reshape(A_a, (-1, self.hidden_dim))
|
||||
a = self.activation(A_a + self.b_a)
|
||||
|
||||
|
||||
o = self.inner_activation(x_o + torch.matmul(h_tm1 * B_U[0], self.U_o))
|
||||
|
||||
h = o * a
|
||||
p = torch.matmul(h, self.W_p) + self.b_p
|
||||
|
||||
self.states = [p, h, S_re, S_im, time, None, None, None]
|
||||
self.states = []
|
||||
self.states = []
|
||||
return self.fc_out(p).squeeze()
|
||||
|
||||
def init_states(self, x):
|
||||
reducer_f = torch.zeros((self.hidden_dim, self.freq_dim)).to(self.device)
|
||||
reducer_p = torch.zeros((self.hidden_dim, self.output_dim)).to(self.device)
|
||||
|
||||
|
||||
init_state_h = torch.zeros(self.hidden_dim).to(self.device)
|
||||
init_state_p = torch.matmul(init_state_h, reducer_p)
|
||||
|
||||
|
||||
init_state = torch.zeros_like(init_state_h).to(self.device)
|
||||
init_freq = torch.matmul(init_state_h, reducer_f)
|
||||
|
||||
init_state = torch.reshape(init_state, (-1, self.hidden_dim, 1))
|
||||
init_freq = torch.reshape(init_freq, (-1, 1, self.freq_dim))
|
||||
|
||||
|
||||
init_state_S_re = init_state * init_freq
|
||||
init_state_S_im = init_state * init_freq
|
||||
|
||||
|
||||
init_state_time = torch.tensor(0).to(self.device)
|
||||
|
||||
self.states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time, None, None, None]
|
||||
@@ -203,6 +201,7 @@ class SFM(Model):
|
||||
dropout_U=0.0,
|
||||
n_epochs=200,
|
||||
lr=0.001,
|
||||
metric = "",
|
||||
batch_size=2000,
|
||||
early_stop=20,
|
||||
eval_steps=5,
|
||||
@@ -227,14 +226,15 @@ class SFM(Model):
|
||||
self.dropout_U = dropout_U
|
||||
self.n_epochs = n_epochs
|
||||
self.lr = lr
|
||||
self.metric = metric
|
||||
self.batch_size = batch_size
|
||||
self.early_stop = early_stop
|
||||
self.eval_steps = eval_steps
|
||||
self.lr_decay = lr_decay
|
||||
self.lr_decay_steps = lr_decay_steps
|
||||
self.optimizer = optimizer.lower()
|
||||
self.loss_type = loss
|
||||
self.device = "cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu"
|
||||
self.loss = loss
|
||||
self.device = "cuda:%d"%(GPU) if torch.cuda.is_available() else "cpu"
|
||||
self.use_gpu = torch.cuda.is_available()
|
||||
self.seed = seed
|
||||
|
||||
@@ -243,11 +243,12 @@ class SFM(Model):
|
||||
"\nd_feat : {}"
|
||||
"\nhidden_size : {}"
|
||||
"\noutput_size : {}"
|
||||
"\nfrequency_dimension : {}"
|
||||
"\nfrequency_dimension : {}"
|
||||
"\ndropout_W: {}"
|
||||
"\ndropout_U: {}"
|
||||
"\nn_epochs : {}"
|
||||
"\nlr : {}"
|
||||
"\nmetric : {}"
|
||||
"\nbatch_size : {}"
|
||||
"\nearly_stop : {}"
|
||||
"\neval_steps : {}"
|
||||
@@ -266,6 +267,7 @@ class SFM(Model):
|
||||
dropout_U,
|
||||
n_epochs,
|
||||
lr,
|
||||
metric,
|
||||
batch_size,
|
||||
early_stop,
|
||||
eval_steps,
|
||||
@@ -284,14 +286,14 @@ class SFM(Model):
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
self.sfm_model = SFM_Model(
|
||||
d_feat=self.d_feat,
|
||||
d_feat=self.d_feat,
|
||||
output_dim=self.output_dim,
|
||||
hidden_size=self.hidden_size,
|
||||
freq_dim=self.freq_dim,
|
||||
dropout_W=self.dropout_W,
|
||||
dropout_U=self.dropout_U,
|
||||
device=self.device,
|
||||
)
|
||||
hidden_size=self.hidden_size,
|
||||
freq_dim=self.freq_dim,
|
||||
dropout_W=self.dropout_W,
|
||||
dropout_U=self.dropout_U,
|
||||
device=self.device
|
||||
)
|
||||
if optimizer.lower() == "adam":
|
||||
self.train_optimizer = optim.Adam(self.sfm_model.parameters(), lr=self.lr)
|
||||
elif optimizer.lower() == "gd":
|
||||
@@ -299,24 +301,73 @@ class SFM(Model):
|
||||
else:
|
||||
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
|
||||
|
||||
# Reduce learning rate when loss has stopped decrease
|
||||
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
self.train_optimizer,
|
||||
mode="min",
|
||||
factor=0.5,
|
||||
patience=10,
|
||||
verbose=True,
|
||||
threshold=0.0001,
|
||||
threshold_mode="rel",
|
||||
cooldown=0,
|
||||
min_lr=0.00001,
|
||||
eps=1e-08,
|
||||
)
|
||||
|
||||
self._fitted = False
|
||||
self.sfm_model.to(self.device)
|
||||
|
||||
def fit(self, dataset: DatasetH, evals_result=dict(), verbose=True, save_path=None, **kwargs):
|
||||
def test_epoch(self, data_x, data_y):
|
||||
|
||||
# prepare training data
|
||||
x_values = data_x.values
|
||||
y_values = np.squeeze(data_y.values)
|
||||
|
||||
self.sfm_model.eval()
|
||||
|
||||
scores = []
|
||||
losses = []
|
||||
|
||||
indices = np.arange(len(x_values))
|
||||
np.random.shuffle(indices)
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.sfm_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
losses.append(loss.item())
|
||||
|
||||
score = self.metric_fn(pred, label)
|
||||
scores.append(score.item())
|
||||
|
||||
return np.mean(losses), np.mean(scores)
|
||||
|
||||
def train_epoch(self, x_train, y_train):
|
||||
|
||||
x_train_values = x_train.values
|
||||
y_train_values = np.squeeze(y_train.values) * 100
|
||||
|
||||
self.sfm_model.train()
|
||||
|
||||
indices = np.arange(len(x_train_values))
|
||||
np.random.shuffle(indices)
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.sfm_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
|
||||
self.train_optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(self.sfm_model.parameters(), 3.0)
|
||||
self.train_optimizer.step()
|
||||
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
evals_result=dict(),
|
||||
verbose=True,
|
||||
save_path=None,
|
||||
):
|
||||
|
||||
df_train, df_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
|
||||
@@ -324,10 +375,10 @@ class SFM(Model):
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
|
||||
save_path = create_save_path(save_path)
|
||||
stop_steps = 0
|
||||
train_loss = 0
|
||||
best_loss = np.inf
|
||||
best_score = -np.inf
|
||||
best_epoch = 0
|
||||
evals_result["train"] = []
|
||||
evals_result["valid"] = []
|
||||
|
||||
@@ -335,90 +386,56 @@ class SFM(Model):
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
|
||||
# prepare training data
|
||||
x_train_values = torch.from_numpy(x_train.values).float()
|
||||
y_train_values = torch.from_numpy(np.squeeze(y_train.values)).float()
|
||||
train_num = y_train_values.shape[0]
|
||||
|
||||
# prepare validation data
|
||||
x_val_auto = torch.from_numpy(x_valid.values).float()
|
||||
y_val_auto = torch.from_numpy(np.squeeze(y_valid.values)).float()
|
||||
|
||||
x_val_auto = x_val_auto.to(self.device)
|
||||
y_val_auto = y_val_auto.to(self.device)
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
if stop_steps >= self.early_stop:
|
||||
if verbose:
|
||||
self.logger.info("\tearly stop")
|
||||
break
|
||||
loss = AverageMeter()
|
||||
self.sfm_model.train()
|
||||
self.train_optimizer.zero_grad()
|
||||
self.logger.info("Epoch%d:", step)
|
||||
self.logger.info("training...")
|
||||
self.train_epoch(x_train, y_train)
|
||||
self.logger.info("evaluating...")
|
||||
train_loss, train_score = self.test_epoch(x_train, y_train)
|
||||
val_loss, val_score = self.test_epoch(x_valid, y_valid)
|
||||
self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
|
||||
evals_result["train"].append(train_score)
|
||||
evals_result["valid"].append(val_score)
|
||||
|
||||
choice = np.random.choice(train_num, self.batch_size)
|
||||
x_batch_auto = x_train_values[choice]
|
||||
y_batch_auto = y_train_values[choice]
|
||||
|
||||
x_batch_auto = x_batch_auto.to(self.device)
|
||||
y_batch_auto = y_batch_auto.to(self.device)
|
||||
|
||||
# forward
|
||||
preds = self.sfm_model(x_batch_auto)
|
||||
cur_loss = self.get_loss(preds, y_batch_auto, self.loss_type)
|
||||
cur_loss.backward()
|
||||
self.train_optimizer.step()
|
||||
loss.update(cur_loss.item())
|
||||
|
||||
# validation
|
||||
train_loss += loss.val
|
||||
if step and step % self.eval_steps == 0:
|
||||
if val_score > best_score:
|
||||
best_score = val_score
|
||||
stop_steps = 0
|
||||
best_epoch = step
|
||||
best_param = copy.deepcopy(self.sfm_model.state_dict())
|
||||
else:
|
||||
stop_steps += 1
|
||||
train_loss /= self.eval_steps
|
||||
|
||||
with torch.no_grad():
|
||||
self.sfm_model.eval()
|
||||
loss_val = AverageMeter()
|
||||
|
||||
# forward
|
||||
preds = self.sfm_model(x_val_auto)
|
||||
cur_loss_val = self.get_loss(preds, y_val_auto, self.loss_type)
|
||||
loss_val.update(cur_loss_val.item())
|
||||
|
||||
if verbose:
|
||||
self.logger.info(
|
||||
"[Epoch {}]: train_loss {:.6f}, valid_loss {:.6f}".format(step, train_loss, loss_val.val)
|
||||
)
|
||||
evals_result["train"].append(train_loss)
|
||||
evals_result["valid"].append(loss_val.val)
|
||||
if loss_val.val < best_loss:
|
||||
if verbose:
|
||||
self.logger.info(
|
||||
"\tvalid loss update from {:.6f} to {:.6f}, save checkpoint.".format(
|
||||
best_loss, loss_val.val
|
||||
)
|
||||
)
|
||||
best_loss = loss_val.val
|
||||
stop_steps = 0
|
||||
torch.save(self.sfm_model.state_dict(), save_path)
|
||||
train_loss = 0
|
||||
# update learning rate
|
||||
self.scheduler.step(cur_loss_val)
|
||||
|
||||
if stop_steps >= self.early_stop:
|
||||
self.logger.info("early stop")
|
||||
break
|
||||
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
|
||||
if self.device != "cpu":
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def get_loss(self, pred, target, loss_type):
|
||||
if loss_type == "mse":
|
||||
sqr_loss = (pred - target) ** 2
|
||||
loss = sqr_loss.mean()
|
||||
return loss
|
||||
elif loss_type == "binary":
|
||||
loss = nn.BCELoss()
|
||||
return loss(pred, target)
|
||||
else:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss_type))
|
||||
def mse(self, pred, label):
|
||||
loss = (pred - label) ** 2
|
||||
return torch.mean(loss)
|
||||
|
||||
def loss_fn(self, pred, label):
|
||||
mask = ~torch.isnan(label)
|
||||
|
||||
if self.loss == "mse":
|
||||
return self.mse(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown loss `%s`" % self.loss)
|
||||
|
||||
def metric_fn(self, pred, label):
|
||||
|
||||
mask = torch.isfinite(label)
|
||||
if self.metric == "IC":
|
||||
return self.cal_ic(pred[mask], label[mask])
|
||||
|
||||
if self.metric == "" or self.metric == "loss": # use loss
|
||||
return -self.loss_fn(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown metric `%s`" % self.metric)
|
||||
|
||||
def cal_ic(self, pred, label):
|
||||
return torch.mean(pred * label)
|
||||
def predict(self, dataset):
|
||||
if not self._fitted:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
@@ -430,7 +447,7 @@ class SFM(Model):
|
||||
sample_num = x_values.shape[0]
|
||||
preds = []
|
||||
|
||||
for begin in range(sample_num)[:: self.batch_size]:
|
||||
for begin in range(sample_num)[::self.batch_size]:
|
||||
if sample_num - begin < self.batch_size:
|
||||
end = sample_num
|
||||
else:
|
||||
@@ -440,37 +457,16 @@ class SFM(Model):
|
||||
|
||||
if self.device != "cpu":
|
||||
x_batch = x_batch.to(self.device)
|
||||
|
||||
|
||||
with torch.no_grad():
|
||||
if self.device != "cpu":
|
||||
pred = self.sfm_model(x_batch).detach().cpu().numpy()
|
||||
else:
|
||||
pred = self.sfm_model(x_batch).detach().cpu().numpy()
|
||||
pred = self.sfm_model(x_batch).detach().cpu().numpy()
|
||||
|
||||
preds.append(pred)
|
||||
|
||||
|
||||
return pd.Series(np.concatenate(preds), index=index)
|
||||
|
||||
def save(self, filename, **kwargs):
|
||||
with save_multiple_parts_file(filename) as model_dir:
|
||||
model_path = os.path.join(model_dir, os.path.split(model_dir)[-1])
|
||||
# Save model
|
||||
torch.save(self.sfm_model.state_dict(), model_path)
|
||||
|
||||
def load(self, buffer, **kwargs):
|
||||
with unpack_archive_with_buffer(buffer) as model_dir:
|
||||
# Get model name
|
||||
_model_name = os.path.splitext(list(filter(lambda x: x.startswith("model.bin"), os.listdir(model_dir)))[0])[
|
||||
0
|
||||
]
|
||||
_model_path = os.path.join(model_dir, _model_name)
|
||||
# Load model
|
||||
self.sfm_model.load_state_dict(torch.load(_model_path))
|
||||
self._fitted = True
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
|
||||
@@ -30,15 +30,15 @@ class XGBModel(Model):
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
num_boost_round=1000,
|
||||
early_stopping_rounds=50,
|
||||
verbose_eval=20,
|
||||
evals_result=dict(),
|
||||
num_boost_round = 1000,
|
||||
early_stopping_rounds = 50,
|
||||
verbose_eval = 20,
|
||||
evals_result = dict(),
|
||||
**kwargs
|
||||
):
|
||||
|
||||
df_train, df_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
|
||||
["train", "valid"], col_set = ["feature", "label"], data_key = DataHandlerLP.DK_L
|
||||
)
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
@@ -49,16 +49,16 @@ class XGBModel(Model):
|
||||
else:
|
||||
raise ValueError("XGBoost doesn't support multi-label training")
|
||||
|
||||
dtrain = xgb.DMatrix(x_train.values, label=y_train_1d)
|
||||
dvalid = xgb.DMatrix(x_valid.values, label=y_valid_1d)
|
||||
dtrain = xgb.DMatrix(x_train.values, label = y_train_1d)
|
||||
dvalid = xgb.DMatrix(x_valid.values, label = y_valid_1d)
|
||||
self.model = xgb.train(
|
||||
self._params,
|
||||
dtrain=dtrain,
|
||||
num_boost_round=num_boost_round,
|
||||
evals=[(dtrain, "train"), (dvalid, "valid")],
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
verbose_eval=verbose_eval,
|
||||
evals_result=evals_result,
|
||||
dtrain = dtrain,
|
||||
num_boost_round = num_boost_round,
|
||||
evals = [(dtrain, "train"), (dvalid, "valid")],
|
||||
early_stopping_rounds = early_stopping_rounds,
|
||||
verbose_eval = verbose_eval,
|
||||
evals_result = evals_result,
|
||||
**kwargs
|
||||
)
|
||||
evals_result["train"] = list(evals_result["train"].values())[0]
|
||||
@@ -67,5 +67,5 @@ class XGBModel(Model):
|
||||
def predict(self, dataset):
|
||||
if self.model is None:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
x_test = dataset.prepare("test", col_set="feature")
|
||||
return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index)
|
||||
x_test = dataset.prepare("test", col_set = "feature")
|
||||
return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index = x_test.index)
|
||||
|
||||
@@ -166,7 +166,9 @@ class MinMaxNorm(Processor):
|
||||
return df
|
||||
|
||||
|
||||
class ZscoreNorm(Processor):
|
||||
class ZScoreNorm(Processor):
|
||||
"""ZScore Normalization"""
|
||||
|
||||
def __init__(self, fit_start_time, fit_end_time, fields_group=None):
|
||||
self.fit_start_time = fit_start_time
|
||||
self.fit_end_time = fit_end_time
|
||||
@@ -193,6 +195,42 @@ class ZscoreNorm(Processor):
|
||||
return df
|
||||
|
||||
|
||||
class RobustZScoreNorm(Processor):
|
||||
"""Robust ZScore Normalization
|
||||
|
||||
Use robust statistics for Z-Score normalization:
|
||||
mean(x) = median(x)
|
||||
std(x) = MAD(x) * 1.4826
|
||||
|
||||
Reference:
|
||||
https://en.wikipedia.org/wiki/Median_absolute_deviation.
|
||||
"""
|
||||
|
||||
def __init__(self, fit_start_time, fit_end_time, fields_group=None, clip_outlier=True):
|
||||
self.fit_start_time = fit_start_time
|
||||
self.fit_end_time = fit_end_time
|
||||
self.fields_group = fields_group
|
||||
self.clip_outlier = clip_outlier
|
||||
|
||||
def fit(self, df):
|
||||
df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime")
|
||||
self.cols = get_group_columns(df, self.fields_group)
|
||||
X = df[self.cols].values
|
||||
self.mean_train = np.nanmedian(X, axis=0)
|
||||
self.std_train = np.nanmedian(np.abs(X - self.mean_train), axis=0)
|
||||
self.std_train += EPS
|
||||
self.std_train *= 1.4826
|
||||
|
||||
def __call__(self, df):
|
||||
X = df[self.cols]
|
||||
X -= self.mean_train
|
||||
X /= self.std_train
|
||||
df[self.cols] = X
|
||||
if self.clip_outlier:
|
||||
df.clip(-3, 3, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
class CSZScoreNorm(Processor):
|
||||
"""Cross Sectional ZScore Normalization"""
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ def sys_config(config, config_path):
|
||||
Parameters
|
||||
----------
|
||||
config : dict
|
||||
configuration of the workflow
|
||||
configuration of the workflow.
|
||||
config_path : str
|
||||
configuration of the path
|
||||
configuration of the path.
|
||||
"""
|
||||
sys_config = config.get("sys", {})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user