1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 18:11:18 +08:00
Files
qlib/examples/high_freq/workflow.py
2021-01-25 17:58:45 +00:00

138 lines
4.1 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from pathlib import Path
import qlib
import pickle
import numpy as np
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.utils import init_instance_by_config
from qlib.data.dataset.handler import DataHandlerLP
from qlib.data.ops import Operators
from qlib.data.data import Cal
from highfreq_ops import DayFirst, DayLast, FFillNan, Date, Select, IsNull
def save_dataset(dataset, path: [Path, str]):
"""
save dataset to path
Parameters
----------
path : [Path, str]
path to save
"""
dataset.to_pickle(path=path)
def load_dataset(path: [Path, str], init_type=DataHandlerLP.IT_LS):
"""
load dataset from path
Parameters
----------
path : [Path, str]
path to load
init_type : str
- if `init_type` == DataHandlerLP.IT_FIT_SEQ:
the input of `DataHandlerLP.fit` will be the output of the previous processor
- if `init_type` == DataHandlerLP.IT_FIT_IND:
the input of `DataHandlerLP.fit` will be the original df
- if `init_type` == DataHandlerLP.IT_LS:
The state of the object has been load by pickle
"""
fd = open(path, 'rb')
dataset = pickle.load(fd)
dataset.init(init_type=init_type)
fd.close()
return dataset
if __name__ == "__main__":
# use default data
provider_uri = "/mnt/v-xiabi/data/qlib/high_freq" # target_dir
qlib.init(provider_uri=provider_uri, custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], redis_port=233, region=REG_CN, auto_mount=False)
MARKET = "csi300"
BENCHMARK = "SH000300"
###################################
# train model
###################################
DATA_HANDLER_CONFIG0 = {
"start_time": "2017-01-01 00:00:00",
"end_time": "2020-11-30 15:00:00",
"freq": "1min",
"fit_start_time": "2017-01-01 00:00:00",
"fit_end_time": "2020-08-31 15:00:00",
"instruments": "all",
"infer_processors": [{"class": "HighFreqNorm", "module_path": "highfreq_processor", "kwargs": {}}],
}
DATA_HANDLER_CONFIG1 = {
"start_time": "2017-01-01 00:00:00",
"end_time": "2020-11-30 15:00:00",
"freq": "1min",
"instruments": "all",
}
task = {
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "HighFreqHandler",
"module_path": "highfreq_handler",
"kwargs": DATA_HANDLER_CONFIG0,
},
"segments": {
"train": ("2017-01-01 00:00:00", "2020-08-31 15:00:00"),
"test": (
"2020-09-01 00:00:00",
"2020-11-30 15:00:00",
),
},
},
},
# You shoud record the data in specific sequence
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
"dataset_backtest": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "HighFreqBacktestHandler",
"module_path": "highfreq_hander",
"kwargs": DATA_HANDLER_CONFIG1,
},
"segments": {
"train": ("2017-01-01 00:00:00", "2020-08-31 15:00:00"),
"test": (
"2020-09-01 00:00:00",
"2020-11-30 15:00:00",
),
},
},
},
}
Cal.get_calender_day(freq="1min") # TO FIX: load the calendar day for cache
dataset = init_instance_by_config(task["dataset"])
dataset_backtest = init_instance_by_config(task["dataset_backtest"])