From 1b569d371d24440d80e20c7cb01563cc1a33d0f5 Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 26 Jan 2021 14:32:08 +0000 Subject: [PATCH] simpson vwap --- examples/high_freq/highfreq_handler.py | 54 ++++++++------- examples/high_freq/highfreq_processor.py | 1 + examples/high_freq/workflow.py | 88 +++++++++++++----------- 3 files changed, 78 insertions(+), 65 deletions(-) diff --git a/examples/high_freq/highfreq_handler.py b/examples/high_freq/highfreq_handler.py index d50b95ec7..09d954364 100644 --- a/examples/high_freq/highfreq_handler.py +++ b/examples/high_freq/highfreq_handler.py @@ -7,7 +7,7 @@ from qlib.log import TimeInspector class HighFreqHandler(DataHandlerLP): def __init__( self, - instruments="csi500", + instruments="csi300", start_time=None, end_time=None, freq="1min", @@ -55,8 +55,10 @@ class HighFreqHandler(DataHandlerLP): names = [] template_if = "If(IsNull({1}), {0}, {1})" - template_paused = "Select(Eq($paused, 0.0), {0})" + #template_paused = "Select(Eq($paused, 0.0), {0})" + template_paused="{0}" template_fillnan = "FFillNan({0})" + simpson_vwap = "($open + 2*$high + 2*$low + $close)/6" fields += [ "{0}/Ref(DayLast({1}), 240)".format( template_if.format( @@ -87,11 +89,9 @@ class HighFreqHandler(DataHandlerLP): fields += ["{0}/Ref(DayLast({0}), 240)".format(template_fillnan.format(template_paused.format("$close")))] fields += [ "{0}/Ref(DayLast({1}), 240)".format( - "If(IsNull({1}), {0}, If(Or(Or(Or(Eq({1}, np.inf), Eq({1}, -np.inf)), Eq({1}, 0)), Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2})))), {0}, {1}))".format( + template_if.format( template_fillnan.format(template_paused.format("$close")), - template_paused.format("$vwap"), - template_paused.format("$low"), - template_paused.format("$high"), + template_paused.format(simpson_vwap), ), template_fillnan.format(template_paused.format("$close")), ) @@ -128,13 +128,12 @@ class HighFreqHandler(DataHandlerLP): fields += [ "Ref({0}, 240)/Ref(DayLast({0}), 240)".format(template_fillnan.format(template_paused.format("$close"))) ] + fields += [ "Ref({0}, 240)/Ref(DayLast({1}), 240)".format( - "If(IsNull({1}), {0}, If(Or(Or(Or(Eq({1}, np.inf), Eq({1}, -np.inf)), Eq({1}, 0)), Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2})))), {0}, {1}))".format( + template_if.format( template_fillnan.format(template_paused.format("$close")), - template_paused.format("$vwap"), - template_paused.format("$low"), - template_paused.format("$high"), + template_paused.format(simpson_vwap), ), template_fillnan.format(template_paused.format("$close")), ) @@ -143,10 +142,9 @@ class HighFreqHandler(DataHandlerLP): fields += [ "{0}/Ref(DayLast(Mean({0}, 7200)), 240)".format( - "If(IsNull({1}), 0, If(Or(Gt({2}, Mul(1.001, {4})), Lt({2}, Mul(0.999, {3}))), 0, {1}))".format( - template_fillnan.format(template_paused.format("$close")), + "If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format( template_paused.format("$volume"), - template_paused.format("$vwap"), + template_paused.format(simpson_vwap), template_paused.format("$low"), template_paused.format("$high"), ) @@ -155,10 +153,9 @@ class HighFreqHandler(DataHandlerLP): names += ["$volume"] fields += [ "Ref({0}, 240)/Ref(DayLast(Mean({0}, 7200)), 240)".format( - "If(IsNull({1}), 0, If(Or(Gt({2}, Mul(1.001, {4})), Lt({2}, Mul(0.999, {3}))), 0, {1}))".format( - template_fillnan.format(template_paused.format("$close")), + "If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format( template_paused.format("$volume"), - template_paused.format("$vwap"), + template_paused.format(simpson_vwap), template_paused.format("$low"), template_paused.format("$high"), ) @@ -199,21 +196,26 @@ class HighFreqBacktestHandler(DataHandler): names = [] template_if = "If(Eq({1}, np.nan), {0}, {1})" - template_paused = "Select(Eq($paused, 0.0), {0})" + #template_paused = "Select(Eq($paused, 0.0), {0})" + template_paused="{0}" template_fillnan = "FFillNan({0})" - + simpson_vwap = "($open + 2*$high + 2*$low + $close)/6" + #fields += [ + # template_fillnan.format(template_paused.format("$close")), + #] + fields += [template_if.format( + template_fillnan.format(template_paused.format("$close")), + template_paused.format(simpson_vwap), + )] + names += ["$vwap_0"] fields += [ - template_fillnan.format(template_paused.format("$close")), - ] - names += ["$vwap0"] - fields += [ - "If(Eq({1}, np.nan), 0, If(Or(Gt({2}, Mul(1.001, {4})), Lt({2}, Mul(0.999, {3}))), 0, {1}))".format( - template_fillnan.format(template_paused.format("$close")), + "If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format( template_paused.format("$volume"), - template_paused.format("$vwap"), + template_paused.format(simpson_vwap), template_paused.format("$low"), template_paused.format("$high"), ) ] - names += ["$volume0"] + names += ["$volume_0"] + return fields, names diff --git a/examples/high_freq/highfreq_processor.py b/examples/high_freq/highfreq_processor.py index d71cd2e85..bf4a30a5b 100644 --- a/examples/high_freq/highfreq_processor.py +++ b/examples/high_freq/highfreq_processor.py @@ -58,6 +58,7 @@ class HighFreqNorm(Processor): # print("start_call_feature_reshape") idx = df_features.index.droplevel("datetime").drop_duplicates() idx.set_names(["instrument", "datetime"], inplace=True) + print(df_values.shape) feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240) feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240) df_new_features = pd.DataFrame( diff --git a/examples/high_freq/workflow.py b/examples/high_freq/workflow.py index eb30fb1b8..e1736394e 100644 --- a/examples/high_freq/workflow.py +++ b/examples/high_freq/workflow.py @@ -27,7 +27,7 @@ from highfreq_ops import DayFirst, DayLast, FFillNan, Date, Select, IsNull if __name__ == "__main__": # use default data - provider_uri = "/mnt/v-xiabi/data/qlib/high_freq" # target_dir + provider_uri = "/nfs_data/qlib_data/yahoo_high_qlib" # target_dir qlib.init( provider_uri=provider_uri, custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], @@ -38,12 +38,16 @@ if __name__ == "__main__": MARKET = "all" BENCHMARK = "SH000300" + DROP_LOAD_DATASET = False # flag wether to test [drop and load dataset] - start_time = "2019-01-01 00:00:00" - end_time = "2019-12-31 15:00:00" - train_end_time = "2019-05-31 15:00:00" - test_start_time = "2019-06-01 00:00:00" - + #start_time = "2019-01-01 00:00:00" + #end_time = "2019-12-31 15:00:00" + #train_end_time = "2019-05-31 15:00:00" + #test_start_time = "2019-06-01 00:00:00" + start_time = "2020-09-14 00:00:00" + end_time = "2021-01-18 16:00:00" + train_end_time = "2020-11-30 16:00:00" + test_start_time = "2020-12-01 00:00:00" ################################### # train model ################################### @@ -108,51 +112,57 @@ if __name__ == "__main__": Cal.get_calendar_day(freq="1min") ##=============get data============= + dataset = init_instance_by_config(task["dataset"]) + xtrain, xtest = dataset.prepare(["train", "test"]) + print(xtrain, xtest) + dataset_backtest = init_instance_by_config(task["dataset_backtest"]) - xtrain, xtest = dataset.prepare(["train", "test"]) backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"]) - print(xtrain, xtest) print(backtest_train, backtest_test) + del xtrain, xtest del backtest_train, backtest_test - ##=============dump dataset============= - dataset.to_pickle(path="dataset.pkl") - dataset_backtest.to_pickle(path="dataset_backtest.pkl") - del dataset, dataset_backtest - ##=============reload dataset============= - file_dataset = open("dataset.pkl", "rb") - dataset = pickle.load(file_dataset) - file_dataset.close() + if DROP_LOAD_DATASET: - file_dataset_backtest = open("dataset_backtest.pkl", "rb") - dataset_backtest = pickle.load(file_dataset_backtest) + ##=============dump dataset============= + dataset.to_pickle(path="dataset.pkl") + dataset_backtest.to_pickle(path="dataset_backtest.pkl") - file_dataset_backtest.close() + del dataset, dataset_backtest + ##=============reload dataset============= + file_dataset = open("dataset.pkl", "rb") + dataset = pickle.load(file_dataset) + file_dataset.close() - ##=============reload_dataset============= - dataset.init(init_type=DataHandlerLP.IT_LS) - dataset_backtest.init(init_type=DataHandlerLP.IT_LS) + file_dataset_backtest = open("dataset_backtest.pkl", "rb") + dataset_backtest = pickle.load(file_dataset_backtest) - ##=============reinit qlib============= - qlib.init( - provider_uri=provider_uri, - custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], - redis_port=-1, - region=REG_CN, - auto_mount=False, - ) + file_dataset_backtest.close() - Cal.calendar(freq="1min") # load the calendar for cache - Cal.get_calendar_day(freq="1min") # load the calendar for cache + ##=============reload_dataset============= + dataset.init(init_type=DataHandlerLP.IT_LS) + dataset_backtest.init(init_type=DataHandlerLP.IT_LS) - ##=============test dataset - xtrain, xtest = dataset.prepare(["train", "test"]) - backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"]) + ##=============reinit qlib============= + qlib.init( + provider_uri=provider_uri, + custom_ops=[DayFirst, DayLast, FFillNan, Date, Select, IsNull], + redis_port=-1, + region=REG_CN, + auto_mount=False, + ) - print(xtrain, xtest) - print(backtest_train, backtest_test) - del xtrain, xtest - del backtest_train, backtest_test + Cal.calendar(freq="1min") # load the calendar for cache + Cal.get_calendar_day(freq="1min") # load the calendar for cache + + ##=============test dataset + xtrain, xtest = dataset.prepare(["train", "test"]) + backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"]) + + print(xtrain, xtest) + print(backtest_train, backtest_test) + del xtrain, xtest + del backtest_train, backtest_test