From 577923a9f00716a9d1703c11a4665c3bde8af43f Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 6 Dec 2022 20:49:56 +0800 Subject: [PATCH] Fix RL example bug (#1384) * Fix data pipeline * Add TODO --- examples/rl/scripts/gen_backtest_orders.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/rl/scripts/gen_backtest_orders.py b/examples/rl/scripts/gen_backtest_orders.py index cdf6f9cb8..1857f6447 100644 --- a/examples/rl/scripts/gen_backtest_orders.py +++ b/examples/rl/scripts/gen_backtest_orders.py @@ -14,11 +14,22 @@ args = parser.parse_args() np.random.seed(args.seed) -path = os.path.join("data", "pickle", "backtesttest.pkl") # TODO: rename file +path = os.path.join("data", "pickle", "backtesttest.pkl") df = pickle.load(open(path, "rb")).reset_index() df["date"] = df["datetime"].dt.date.astype("datetime64") instruments = sorted(set(df["instrument"])) + +# TODO: The example is expected to be able to handle data containing missing values. +# TODO: Currently, we just simply skip dates that contain missing data. We will add +# TODO: this feature in the future. +skip_dates = {} +for instrument in instruments: + csv_df = pd.read_csv(os.path.join("data", "csv", f"{instrument}.csv")) + csv_df = csv_df[csv_df["close"].isna()] + dates = set([str(d).split(" ")[0] for d in csv_df["date"]]) + skip_dates[instrument] = dates + df_list = [] for instrument in instruments: print(instrument) @@ -26,6 +37,7 @@ for instrument in instruments: cur_df = df[df["instrument"] == instrument] dates = sorted(set([str(d).split(" ")[0] for d in cur_df["date"]])) + dates = [date for date in dates if date not in skip_dates[instrument]] n = args.num_order df_list.append(