1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 01:51:18 +08:00
Files
qlib/examples/rl_order_execution/scripts/collect_pickle_dataframe.py
Huoran Li 653c082e7a Order execution open source (#1447)
* Waiting for bin data

* Complete readme

* CI

* Add inst filter by time

* Update qlib/data/dataset/processor.py

* typo

* Fix time filter bug

* Add Filter and set Universe

* Complete data pipeline

* Fix Provider Logger Info Args

* Add DQN; a minor bugfix in ppo reward.

* update readme. modify assertion logic in strategy check.

* Fix Doc issues and fix black

* Fix pylint Error

---------

Co-authored-by: Young <afe.young@gmail.com>
Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
2023-03-13 12:06:28 +08:00

27 lines
993 B
Python
Executable File

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import pickle
import pandas as pd
from joblib import Parallel, delayed
os.makedirs(os.path.join("data", "pickle_dataframe"), exist_ok=True)
def _collect(df: pd.DataFrame, instrument: str, tag: str) -> None:
cur = df[df["instrument"] == instrument].sort_values(by=["datetime"])
cur = cur.set_index(["instrument", "datetime", "date"])
pickle.dump(cur, open(os.path.join("data", "pickle_dataframe", tag, f"{instrument}.pkl"), "wb"))
for tag in ("backtest", "feature"):
df = pickle.load(open(os.path.join("data", "pickle", f"{tag}.pkl"), "rb"))
df = pd.concat(list(df.values())).reset_index()
df["date"] = df["datetime"].dt.date.astype("datetime64")
instruments = sorted(set(df["instrument"]))
os.makedirs(os.path.join("data", "pickle_dataframe", tag), exist_ok=True)
Parallel(n_jobs=-1, verbose=10)(delayed(_collect)(df, instrument, tag) for instrument in instruments)