mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
optimize_CI (#1314)
This commit is contained in:
@@ -13,7 +13,7 @@ for tag in ("backtest", "feature"):
|
||||
df = pd.concat(list(df.values())).reset_index()
|
||||
df["date"] = df["datetime"].dt.date.astype("datetime64")
|
||||
instruments = sorted(set(df["instrument"]))
|
||||
|
||||
|
||||
os.makedirs(os.path.join("data", "pickle_dataframe", tag), exist_ok=True)
|
||||
for instrument in tqdm(instruments):
|
||||
cur = df[df["instrument"] == instrument].sort_values(by=["datetime"])
|
||||
|
||||
@@ -22,19 +22,21 @@ instruments = sorted(set(df["instrument"]))
|
||||
df_list = []
|
||||
for instrument in instruments:
|
||||
print(instrument)
|
||||
|
||||
|
||||
cur_df = df[df["instrument"] == instrument]
|
||||
|
||||
|
||||
dates = sorted(set([str(d).split(" ")[0] for d in cur_df["date"]]))
|
||||
|
||||
|
||||
n = args.num_order
|
||||
df_list.append(
|
||||
pd.DataFrame({
|
||||
"date": sorted(np.random.choice(dates, size=n, replace=False)),
|
||||
"instrument": [instrument] * n,
|
||||
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
|
||||
"order_type": np.random.randint(low=0, high=2, size=n),
|
||||
}).set_index(["date", "instrument"]),
|
||||
pd.DataFrame(
|
||||
{
|
||||
"date": sorted(np.random.choice(dates, size=n, replace=False)),
|
||||
"instrument": [instrument] * n,
|
||||
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
|
||||
"order_type": np.random.randint(low=0, high=2, size=n),
|
||||
}
|
||||
).set_index(["date", "instrument"]),
|
||||
)
|
||||
|
||||
total_df = pd.concat(df_list)
|
||||
|
||||
@@ -30,8 +30,8 @@ if __name__ == "__main__":
|
||||
if "backtest_conf" in conf:
|
||||
backtest = provider._gen_dataframe(deepcopy(provider.backtest_conf))
|
||||
|
||||
provider.feature_conf['path'] = os.path.splitext(provider.feature_conf['path'])[0] + '/'
|
||||
provider.backtest_conf['path'] = os.path.splitext(provider.backtest_conf['path'])[0] + '/'
|
||||
provider.feature_conf["path"] = os.path.splitext(provider.feature_conf["path"])[0] + "/"
|
||||
provider.backtest_conf["path"] = os.path.splitext(provider.backtest_conf["path"])[0] + "/"
|
||||
# Split by date
|
||||
if args.split == "date" or args.split == "both":
|
||||
provider._gen_day_dataset(deepcopy(provider.feature_conf), "feature")
|
||||
|
||||
@@ -23,15 +23,17 @@ for group, n in zip(("train", "valid", "test"), (args.train_size, args.valid_siz
|
||||
path = os.path.join("data", "pickle", f"backtest{group}.pkl")
|
||||
df = pickle.load(open(path, "rb")).reset_index()
|
||||
df["date"] = df["datetime"].dt.date.astype("datetime64")
|
||||
|
||||
|
||||
dates = sorted(set([str(d).split(" ")[0] for d in df["date"]]))
|
||||
|
||||
data_df = pd.DataFrame({
|
||||
"date": sorted(np.random.choice(dates, size=n, replace=False)),
|
||||
"instrument": [args.stock] * n,
|
||||
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
|
||||
"order_type": [0] * n,
|
||||
}).set_index(["date", "instrument"])
|
||||
data_df = pd.DataFrame(
|
||||
{
|
||||
"date": sorted(np.random.choice(dates, size=n, replace=False)),
|
||||
"instrument": [args.stock] * n,
|
||||
"amount": np.random.randint(low=3, high=11, size=n) * 100.0,
|
||||
"order_type": [0] * n,
|
||||
}
|
||||
).set_index(["date", "instrument"])
|
||||
|
||||
os.makedirs(os.path.join("data", "training_order_split", group), exist_ok=True)
|
||||
pickle.dump(data_df, open(os.path.join("data", "training_order_split", group, f"{args.stock}.pkl"), "wb"))
|
||||
|
||||
Reference in New Issue
Block a user