1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

pass the whole workflow

This commit is contained in:
Young
2020-10-28 14:07:33 +00:00
parent 1a9ee6cef8
commit a50c9008b8
10 changed files with 296 additions and 221 deletions

View File

@@ -16,7 +16,8 @@ from qlib.contrib.evaluate import (
)
from qlib.utils import exists_qlib_data
from qlib.model.learner import train_model
# from qlib.model.learner import train_model
from qlib.utils import init_instance_by_config
if __name__ == "__main__":
@@ -57,13 +58,6 @@ if __name__ == "__main__":
"test_end_time": "2020-08-01",
}
# use default DataHandler
# custom DataHandler, refer to: TODO: DataHandler API url
handler = Alpha158(**DATA_HANDLER_CONFIG)
data = handler.fetch(slice('2008-01-01', '2014-12-31'), data_key=handler.DK_I)
print(data)
task = {
"model": {
"class": "LGBModel",
@@ -80,59 +74,33 @@ if __name__ == "__main__":
"num_threads": 20,
}
},
"data": {
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
'handler': {
"class": "Alpha158",
"kwargs": DATA_HANDLER_CONFIG
},
"train_start_time": "2008-01-01",
"train_end_time": "2014-12-31",
"validate_start_time": "2015-01-01",
"validate_end_time": "2016-12-31",
"test_start_time": "2017-01-01",
"test_end_time": "2020-08-01",
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
'handler': {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": DATA_HANDLER_CONFIG
},
'segments': {
'train': ("2008-01-01", "2014-12-31"),
'valid': ("2015-01-01", "2016-12-31",),
'test': ("2017-01-01", "2020-08-01",),
}
}
},
}
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
}
model = train_model(task)
# model = train_model(task)
model = init_instance_by_config(task['model'])
dataset = init_instance_by_config(task['dataset'])
model.fit(dataset)
sys.exit(0) # I have tested the code above ---------------------------------------------
x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
**TRAINER_CONFIG
)
MODEL_CONFIG = {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
}
# use default model
# custom Model, refer to: TODO: Model API url
model = LGBModel(**MODEL_CONFIG)
model.fit(x_train, y_train, x_validate, y_validate)
_pred = model.predict(x_test)
_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)
# backtest requires pred_score
pred_score = pd.DataFrame(index=_pred.index)
pred_score["score"] = _pred.iloc(axis=1)[0]
pred_score = model.predict(dataset)
# save pred_score to file
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()