mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 02:50:58 +08:00
Order execution open source (#1447)
* Waiting for bin data * Complete readme * CI * Add inst filter by time * Update qlib/data/dataset/processor.py * typo * Fix time filter bug * Add Filter and set Universe * Complete data pipeline * Fix Provider Logger Info Args * Add DQN; a minor bugfix in ppo reward. * update readme. modify assertion logic in strategy check. * Fix Doc issues and fix black * Fix pylint Error --------- Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
This commit is contained in:
26
examples/rl_order_execution/scripts/collect_pickle_dataframe.py
Executable file
26
examples/rl_order_execution/scripts/collect_pickle_dataframe.py
Executable file
@@ -0,0 +1,26 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import pandas as pd
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
os.makedirs(os.path.join("data", "pickle_dataframe"), exist_ok=True)
|
||||
|
||||
|
||||
def _collect(df: pd.DataFrame, instrument: str, tag: str) -> None:
|
||||
cur = df[df["instrument"] == instrument].sort_values(by=["datetime"])
|
||||
cur = cur.set_index(["instrument", "datetime", "date"])
|
||||
pickle.dump(cur, open(os.path.join("data", "pickle_dataframe", tag, f"{instrument}.pkl"), "wb"))
|
||||
|
||||
|
||||
for tag in ("backtest", "feature"):
|
||||
df = pickle.load(open(os.path.join("data", "pickle", f"{tag}.pkl"), "rb"))
|
||||
df = pd.concat(list(df.values())).reset_index()
|
||||
df["date"] = df["datetime"].dt.date.astype("datetime64")
|
||||
instruments = sorted(set(df["instrument"]))
|
||||
|
||||
os.makedirs(os.path.join("data", "pickle_dataframe", tag), exist_ok=True)
|
||||
|
||||
Parallel(n_jobs=-1, verbose=10)(delayed(_collect)(df, instrument, tag) for instrument in instruments)
|
||||
46
examples/rl_order_execution/scripts/gen_pickle_data.py
Executable file
46
examples/rl_order_execution/scripts/gen_pickle_data.py
Executable file
@@ -0,0 +1,46 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import yaml
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
from copy import deepcopy
|
||||
|
||||
from qlib.contrib.data.highfreq_provider import HighFreqProvider
|
||||
|
||||
loader = yaml.FullLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--config", type=str, default="config.yml")
|
||||
parser.add_argument("-d", "--dest", type=str, default=".")
|
||||
parser.add_argument("-s", "--split", type=str, choices=["none", "date", "stock", "both"], default="stock")
|
||||
args = parser.parse_args()
|
||||
|
||||
conf = yaml.load(open(args.config), Loader=loader)
|
||||
|
||||
for k, v in conf.items():
|
||||
if isinstance(v, dict) and "path" in v:
|
||||
v["path"] = os.path.join(args.dest, v["path"])
|
||||
provider = HighFreqProvider(**conf)
|
||||
|
||||
# Gen dataframe
|
||||
if "feature_conf" in conf:
|
||||
feature = provider._gen_dataframe(deepcopy(provider.feature_conf))
|
||||
if "backtest_conf" in conf:
|
||||
backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf))
|
||||
|
||||
provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/"
|
||||
provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/"
|
||||
# Split by date
|
||||
if args.split == "date" or args.split == "both":
|
||||
provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature")
|
||||
provider._gen_day_dataset(deepcopy(provider.backtest_conf), "backtest")
|
||||
|
||||
# Split by stock
|
||||
if args.split == "stock" or args.split == "both":
|
||||
provider._gen_stock_dataset(deepcopy(provider.feature_conf), "feature")
|
||||
provider._gen_stock_dataset(deepcopy(provider.backtest_conf), "backtest")
|
||||
|
||||
shutil.rmtree("stat/", ignore_errors=True)
|
||||
42
examples/rl_order_execution/scripts/gen_training_orders.py
Executable file
42
examples/rl_order_execution/scripts/gen_training_orders.py
Executable file
@@ -0,0 +1,42 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
|
||||
DATA_PATH = Path(os.path.join("data", "pickle_dataframe", "backtest"))
|
||||
OUTPUT_PATH = Path(os.path.join("data", "orders"))
|
||||
|
||||
|
||||
def generate_order(stock: str, start_idx: int, end_idx: int) -> None:
|
||||
df = pd.read_pickle(DATA_PATH / f"{stock}.pkl")
|
||||
df = df.groupby("date").take(range(start_idx, end_idx)).droplevel(level=0)
|
||||
div = df["$volume0"].rolling((end_idx - start_idx) * 60).mean().shift(1).groupby(level="date").transform("first")
|
||||
|
||||
order_all = pd.DataFrame(df.groupby(level=(2, 0)).mean().dropna())
|
||||
order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
|
||||
order_all = order_all[order_all["amount"] > 0.0]
|
||||
order_all["order_type"] = 0
|
||||
order_all = order_all.drop(columns=["$volume0"])
|
||||
|
||||
order_train = order_all[order_all.index.get_level_values(0) <= pd.Timestamp("2021-06-30")]
|
||||
order_test = order_all[order_all.index.get_level_values(0) > pd.Timestamp("2021-06-30")]
|
||||
order_valid = order_test[order_test.index.get_level_values(0) <= pd.Timestamp("2021-09-30")]
|
||||
order_test = order_test[order_test.index.get_level_values(0) > pd.Timestamp("2021-09-30")]
|
||||
|
||||
for order, tag in zip((order_train, order_valid, order_test, order_all), ("train", "valid", "test", "all")):
|
||||
path = OUTPUT_PATH / tag
|
||||
os.makedirs(path, exist_ok=True)
|
||||
if len(order) > 0:
|
||||
order.to_pickle(path / f"{stock}.pkl.target")
|
||||
|
||||
|
||||
np.random.seed(1234)
|
||||
file_list = sorted(os.listdir(DATA_PATH))
|
||||
stocks = [f.replace(".pkl", "") for f in file_list]
|
||||
stocks = sorted(np.random.choice(stocks, size=100, replace=False))
|
||||
for stock in tqdm(stocks):
|
||||
generate_order(stock, 0, 240 // 5 - 1)
|
||||
15
examples/rl_order_execution/scripts/merge_orders.py
Executable file
15
examples/rl_order_execution/scripts/merge_orders.py
Executable file
@@ -0,0 +1,15 @@
|
||||
import pickle
|
||||
import os
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
for tag in ["test", "valid"]:
|
||||
files = os.listdir(os.path.join("data/orders/", tag))
|
||||
dfs = []
|
||||
for f in tqdm(files):
|
||||
df = pickle.load(open(os.path.join("data/orders/", tag, f), "rb"))
|
||||
df = df.drop(["$close0"], axis=1)
|
||||
dfs.append(df)
|
||||
|
||||
total_df = pd.concat(dfs)
|
||||
pickle.dump(total_df, open(os.path.join("data", "orders", f"{tag}_orders.pkl"), "wb"))
|
||||
77
examples/rl_order_execution/scripts/pickle_data_config.yml
Executable file
77
examples/rl_order_execution/scripts/pickle_data_config.yml
Executable file
@@ -0,0 +1,77 @@
|
||||
# start & end time for training/validation/test datasets
|
||||
start_time: !!str &start 2020-01-01
|
||||
end_time: !!str &end 2021-12-31
|
||||
train_end_time: !!str &tend 2021-06-30
|
||||
valid_start_time: !!str &vstart 2021-07-01
|
||||
valid_end_time: !!str &vend 2021-09-30
|
||||
test_start_time: !!str &tstart 2021-10-01
|
||||
# the instrument set
|
||||
instruments: &ins csi300s19_22
|
||||
# qlib related configuration
|
||||
qlib_conf:
|
||||
provider_uri:
|
||||
5min: ./data/bin # path to generated qlib bin
|
||||
redis_port: 233
|
||||
feature_conf:
|
||||
path: ./data/pickle/feature.pkl # output path of feature
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: HighFreqGeneralHandler
|
||||
module_path: qlib.contrib.data.highfreq_handler
|
||||
kwargs:
|
||||
start_time: *start
|
||||
end_time: *end
|
||||
fit_start_time: *start
|
||||
fit_end_time: *tend
|
||||
instruments: *ins
|
||||
day_length: 240 # how many minutes in one trading day
|
||||
freq: 5min
|
||||
columns: ["$open", "$high", "$low", "$close"]
|
||||
infer_processors:
|
||||
- class: HighFreqNorm
|
||||
module_path: qlib.contrib.data.highfreq_processor
|
||||
kwargs:
|
||||
feature_save_dir: ./stat/ # output path of statistics of features (for feature normalization)
|
||||
norm_groups:
|
||||
price: 8
|
||||
volume: 2
|
||||
inst_processors:
|
||||
- class: TimeRangeFlt
|
||||
module_path: qlib.data.dataset.processor
|
||||
kwargs:
|
||||
start_time: "2020-01-01"
|
||||
end_time: "2021-12-31"
|
||||
freq: 5min
|
||||
segments:
|
||||
train: !!python/tuple [*start, *tend]
|
||||
valid: !!python/tuple [*vstart, *vend]
|
||||
test: !!python/tuple [*tstart, *end]
|
||||
backtest_conf:
|
||||
path: ./data/pickle/backtest.pkl # output path of backtest
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: HighFreqGeneralBacktestHandler
|
||||
module_path: qlib.contrib.data.highfreq_handler
|
||||
kwargs:
|
||||
start_time: *start
|
||||
end_time: *end
|
||||
instruments: *ins
|
||||
day_length: 240
|
||||
freq: 5min
|
||||
columns: ["$close", "$volume"]
|
||||
inst_processors:
|
||||
- class: TimeRangeFlt
|
||||
module_path: qlib.data.dataset.processor
|
||||
kwargs:
|
||||
start_time: "2020-01-01"
|
||||
end_time: "2021-12-31"
|
||||
freq: 5min
|
||||
segments:
|
||||
train: !!python/tuple [*start, *tend]
|
||||
valid: !!python/tuple [*vstart, *vend]
|
||||
test: !!python/tuple [*tstart, *end]
|
||||
freq: 5min
|
||||
Reference in New Issue
Block a user