mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
pass the whole workflow
This commit is contained in:
@@ -16,7 +16,8 @@ from qlib.contrib.evaluate import (
|
||||
)
|
||||
from qlib.utils import exists_qlib_data
|
||||
|
||||
from qlib.model.learner import train_model
|
||||
# from qlib.model.learner import train_model
|
||||
from qlib.utils import init_instance_by_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -57,13 +58,6 @@ if __name__ == "__main__":
|
||||
"test_end_time": "2020-08-01",
|
||||
}
|
||||
|
||||
# use default DataHandler
|
||||
# custom DataHandler, refer to: TODO: DataHandler API url
|
||||
handler = Alpha158(**DATA_HANDLER_CONFIG)
|
||||
|
||||
data = handler.fetch(slice('2008-01-01', '2014-12-31'), data_key=handler.DK_I)
|
||||
print(data)
|
||||
|
||||
task = {
|
||||
"model": {
|
||||
"class": "LGBModel",
|
||||
@@ -80,59 +74,33 @@ if __name__ == "__main__":
|
||||
"num_threads": 20,
|
||||
}
|
||||
},
|
||||
"data": {
|
||||
"dataset": {
|
||||
"class": "DatasetH",
|
||||
"module_path": "qlib.data.dataset",
|
||||
"kwargs": {
|
||||
'handler': {
|
||||
"class": "Alpha158",
|
||||
"kwargs": DATA_HANDLER_CONFIG
|
||||
},
|
||||
"train_start_time": "2008-01-01",
|
||||
"train_end_time": "2014-12-31",
|
||||
"validate_start_time": "2015-01-01",
|
||||
"validate_end_time": "2016-12-31",
|
||||
"test_start_time": "2017-01-01",
|
||||
"test_end_time": "2020-08-01",
|
||||
"dataset": {
|
||||
"class": "DatasetH",
|
||||
"module_path": "qlib.data.dataset",
|
||||
"kwargs": {
|
||||
'handler': {
|
||||
"class": "Alpha158",
|
||||
"module_path": "qlib.contrib.data.handler",
|
||||
"kwargs": DATA_HANDLER_CONFIG
|
||||
},
|
||||
'segments': {
|
||||
'train': ("2008-01-01", "2014-12-31"),
|
||||
'valid': ("2015-01-01", "2016-12-31",),
|
||||
'test': ("2017-01-01", "2020-08-01",),
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
# You shoud record the data in specific sequence
|
||||
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
|
||||
}
|
||||
|
||||
model = train_model(task)
|
||||
# model = train_model(task)
|
||||
model = init_instance_by_config(task['model'])
|
||||
dataset = init_instance_by_config(task['dataset'])
|
||||
|
||||
model.fit(dataset)
|
||||
|
||||
|
||||
sys.exit(0) # I have tested the code above ---------------------------------------------
|
||||
|
||||
x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
|
||||
**TRAINER_CONFIG
|
||||
)
|
||||
|
||||
MODEL_CONFIG = {
|
||||
"loss": "mse",
|
||||
"colsample_bytree": 0.8879,
|
||||
"learning_rate": 0.0421,
|
||||
"subsample": 0.8789,
|
||||
"lambda_l1": 205.6999,
|
||||
"lambda_l2": 580.9768,
|
||||
"max_depth": 8,
|
||||
"num_leaves": 210,
|
||||
"num_threads": 20,
|
||||
}
|
||||
# use default model
|
||||
# custom Model, refer to: TODO: Model API url
|
||||
model = LGBModel(**MODEL_CONFIG)
|
||||
model.fit(x_train, y_train, x_validate, y_validate)
|
||||
_pred = model.predict(x_test)
|
||||
_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)
|
||||
|
||||
# backtest requires pred_score
|
||||
pred_score = pd.DataFrame(index=_pred.index)
|
||||
pred_score["score"] = _pred.iloc(axis=1)[0]
|
||||
pred_score = model.predict(dataset)
|
||||
|
||||
# save pred_score to file
|
||||
pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
|
||||
|
||||
Reference in New Issue
Block a user