mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-01 01:51:18 +08:00
* Waiting for bin data * Complete readme * CI * Add inst filter by time * Update qlib/data/dataset/processor.py * typo * Fix time filter bug * Add Filter and set Universe * Complete data pipeline * Fix Provider Logger Info Args * Add DQN; a minor bugfix in ppo reward. * update readme. modify assertion logic in strategy check. * Fix Doc issues and fix black * Fix pylint Error --------- Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
27 lines
993 B
Python
Executable File
27 lines
993 B
Python
Executable File
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT License.
|
|
|
|
import os
|
|
import pickle
|
|
import pandas as pd
|
|
from joblib import Parallel, delayed
|
|
|
|
os.makedirs(os.path.join("data", "pickle_dataframe"), exist_ok=True)
|
|
|
|
|
|
def _collect(df: pd.DataFrame, instrument: str, tag: str) -> None:
|
|
cur = df[df["instrument"] == instrument].sort_values(by=["datetime"])
|
|
cur = cur.set_index(["instrument", "datetime", "date"])
|
|
pickle.dump(cur, open(os.path.join("data", "pickle_dataframe", tag, f"{instrument}.pkl"), "wb"))
|
|
|
|
|
|
for tag in ("backtest", "feature"):
|
|
df = pickle.load(open(os.path.join("data", "pickle", f"{tag}.pkl"), "rb"))
|
|
df = pd.concat(list(df.values())).reset_index()
|
|
df["date"] = df["datetime"].dt.date.astype("datetime64")
|
|
instruments = sorted(set(df["instrument"]))
|
|
|
|
os.makedirs(os.path.join("data", "pickle_dataframe", tag), exist_ok=True)
|
|
|
|
Parallel(n_jobs=-1, verbose=10)(delayed(_collect)(df, instrument, tag) for instrument in instruments)
|