1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 11:30:57 +08:00

dnn model opz

This commit is contained in:
bxdd
2020-11-26 21:15:21 +08:00
parent d3480602c3
commit 52c7076917
3 changed files with 49 additions and 20 deletions

View File

@@ -1,4 +1,4 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
provider_uri: "~/.qlib/qlib_data/cn_data_new"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
@@ -8,6 +8,18 @@ data_handler_config: &data_handler_config
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: [
{
"class" : "CSZFillna",
"kwargs":{"fields_group": "feature"}
},
{
"class" : "Fillna",
"kwargs":{"fields_group": "feature"}
}
]
learn_processors: ["DropnaLabel", {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy

View File

@@ -49,7 +49,7 @@ class DNNModelPytorch(Model):
self,
input_dim,
output_dim,
layers=(256, 512, 768, 1024, 768, 512, 256, 128, 64),
layers=(256, 512, 768, 512, 256, 128, 64),
lr=0.001,
max_steps=300,
batch_size=2000,
@@ -78,7 +78,7 @@ class DNNModelPytorch(Model):
self.optimizer = optimizer.lower()
self.loss_type = loss
self.visible_GPU = GPU
self.use_gpu = torch.cuda.is_available()
self.use_GPU = torch.cuda.is_available()
self.logger.info(
"DNN parameters setting:"
@@ -107,7 +107,7 @@ class DNNModelPytorch(Model):
loss,
eval_steps,
GPU,
self.use_gpu,
self.use_GPU,
)
)
@@ -138,7 +138,7 @@ class DNNModelPytorch(Model):
)
self._fitted = False
if self.use_gpu:
if self.use_GPU:
self.dnn_model.cuda()
# set the visible GPU
if self.visible_GPU:
@@ -157,7 +157,6 @@ class DNNModelPytorch(Model):
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
try:
wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L)
w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
@@ -181,13 +180,14 @@ class DNNModelPytorch(Model):
y_train_values = torch.from_numpy(y_train.values).float()
w_train_values = torch.from_numpy(w_train.values).float()
train_num = y_train_values.shape[0]
# prepare validation data
x_val_auto = torch.from_numpy(x_valid.values).float()
y_val_auto = torch.from_numpy(y_valid.values).float()
w_val_auto = torch.from_numpy(w_valid.values).float()
if self.use_gpu:
#print('valiadationx:', x_val_auto)
#print('valiadationy:', y_val_auto)
#print('valiadationw:', w_val_auto)
if self.use_GPU:
x_val_auto = x_val_auto.cuda()
y_val_auto = y_val_auto.cuda()
w_val_auto = w_val_auto.cuda()
@@ -206,13 +206,15 @@ class DNNModelPytorch(Model):
y_batch_auto = y_train_values[choice]
w_batch_auto = w_train_values[choice]
if self.use_gpu:
x_batch_auto = x_batch_auto.float().cuda()
y_batch_auto = y_batch_auto.float().cuda()
w_batch_auto = w_batch_auto.float().cuda()
if self.use_GPU:
x_batch_auto = x_batch_auto.cuda()
y_batch_auto = y_batch_auto.cuda()
w_batch_auto = w_batch_auto.cuda()
# forward
preds = self.dnn_model(x_batch_auto)
#print('pred_train:', preds.detach().cpu().numpy())
#print('label_train:', y_batch_auto.cpu().numpy())
cur_loss = self.get_loss(preds, w_batch_auto, y_batch_auto, self.loss_type)
cur_loss.backward()
self.train_optimizer.step()
@@ -230,6 +232,7 @@ class DNNModelPytorch(Model):
loss_val = AverageMeter()
# forward
preds = self.dnn_model(x_val_auto)
cur_loss_val = self.get_loss(preds, w_val_auto, y_val_auto, self.loss_type)
loss_val.update(cur_loss_val.item())
@@ -255,7 +258,7 @@ class DNNModelPytorch(Model):
# restore the optimal parameters after training ??
self.dnn_model.load_state_dict(torch.load(save_path))
if self.use_gpu:
if self.use_GPU:
torch.cuda.empty_cache()
def get_loss(self, pred, w, target, loss_type):
@@ -273,16 +276,18 @@ class DNNModelPytorch(Model):
if not self._fitted:
raise ValueError("model is not fitted yet!")
x_test_pd = dataset.prepare("test", col_set="feature")
print(x_test_pd)
x_test = torch.from_numpy(x_test_pd.values).float()
if self.use_gpu:
if self.use_GPU:
x_test = x_test.cuda()
self.dnn_model.eval()
with torch.no_grad():
if self.use_gpu:
if self.use_GPU:
preds = self.dnn_model(x_test).detach().cpu().numpy()
else:
preds = self.dnn_model(x_test).detach().numpy()
print(preds)
return pd.Series(np.squeeze(preds), index=x_test_pd.index)
def save(self, filename, **kwargs):
@@ -331,7 +336,7 @@ class Net(nn.Module):
dnn_layers.append(drop_input)
for i, (input_dim, hidden_units) in enumerate(zip(layers[:-1], layers[1:])):
fc = nn.Linear(input_dim, hidden_units)
activation = nn.ReLU()
activation = nn.LeakyReLU(negative_slope=0.1, inplace=False)
bn = nn.BatchNorm1d(hidden_units)
seq = nn.Sequential(fc, bn, activation)
dnn_layers.append(seq)
@@ -354,7 +359,7 @@ class Net(nn.Module):
def _weight_init(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight, gain=1)
nn.init.kaiming_normal_(m.weight, a=0.1, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
cur_output = x

View File

@@ -90,6 +90,7 @@ class DropnaLabel(DropnaProcessor):
return False
class TanhProcess(Processor):
""" Use tanh to process noise data"""
@@ -122,7 +123,6 @@ class ProcessInf(Processor):
return replace_inf(df)
class Fillna(Processor):
"""Process NaN"""
@@ -202,7 +202,8 @@ class CSZScoreNorm(Processor):
def __call__(self, df):
# try not modify original dataframe
cols = get_group_columns(df, self.fields_group)
df[cols] = df[cols].groupby("datetime").apply(lambda df: (df - df.mean()).div(df.std()))
df[cols] = df[cols].groupby("datetime").apply(lambda x: (x - x.mean()).div(x.std()))
return df
@@ -220,3 +221,14 @@ class CSRankNorm(Processor):
t *= 3.46 # NOTE: towards unit std
df[cols] = t
return df
class CSZFillna(Processor):
"""Cross Sectional Fill Nan"""
def __init__(self, fields_group=None):
self.fields_group = fields_group
def __call__(self, df):
cols = get_group_columns(df, self.fields_group)
df[cols] = df[cols].groupby("datetime").apply(lambda x: x.fillna(x.mean()))
return df