mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-30 09:31:18 +08:00
185 lines
6.0 KiB
Python
185 lines
6.0 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from multiprocessing.context import Process
|
|
from multiprocessing import Queue
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append("..")
|
|
|
|
|
|
def toArray(data):
|
|
if type(data) == np.ndarray:
|
|
return data
|
|
|
|
elif type(data) == list:
|
|
data = np.array(data)
|
|
return data
|
|
|
|
elif type(data) == pd.DataFrame:
|
|
share_index = toArray(data.index)
|
|
share_value = toArray(data.values)
|
|
share_colmns = toArray(data.columns)
|
|
return share_index, share_value, share_colmns
|
|
|
|
else:
|
|
try:
|
|
share_array = np.array(data)
|
|
return share_array
|
|
except:
|
|
raise NotImplementedError
|
|
|
|
|
|
class Sampler:
|
|
"""The sampler for training of single-assert RL."""
|
|
|
|
def __init__(self, config):
|
|
self.raw_dir = config["raw_dir"] + "/"
|
|
self.order_dir = config["order_dir"] + "/"
|
|
self.ins_list = [f[:-11] for f in os.listdir(self.order_dir) if f.endswith("target")]
|
|
self.features = config["features"]
|
|
self.queue = Queue(1000)
|
|
self.child = None
|
|
self.ins = None
|
|
self.raw_df = None
|
|
self.df_list = None
|
|
self.order_df = None
|
|
|
|
@staticmethod
|
|
def _worker(order_dir, raw_dir, features, ins_list, queue):
|
|
ins = None
|
|
index = 0
|
|
date_list = []
|
|
while True:
|
|
if ins is None or index == len(date_list):
|
|
ins = np.random.choice(ins_list, 1)[0]
|
|
# print(ins)
|
|
order_df = pd.read_pickle(order_dir + ins + ".pkl.target")
|
|
feature_df_list = []
|
|
for feature in features:
|
|
feature_df_list.append(pd.read_pickle(f"{feature['loc']}/{ins}.pkl"))
|
|
raw_df = pd.read_pickle(raw_dir + ins + ".pkl.backtest")
|
|
date_list = order_df.index.get_level_values(0).tolist()
|
|
index = 0
|
|
date = date_list[index]
|
|
day_order_df = order_df.iloc[index]
|
|
target = day_order_df["amount"]
|
|
index += 1
|
|
if target == 0:
|
|
continue
|
|
day_feature_dfs = []
|
|
day_raw_df = raw_df.loc[pd.IndexSlice[ins, :, date]]
|
|
is_buy = bool(day_order_df["order_type"])
|
|
for df in feature_df_list:
|
|
day_feature_dfs.append(df.loc[ins, date].values)
|
|
day_feature_dfs = np.array(day_feature_dfs)
|
|
day_raw_df_index, day_raw_df_value, day_raw_df_column = toArray(day_raw_df)
|
|
day_feature_dfs_ = toArray(day_feature_dfs)
|
|
queue.put(
|
|
(ins, date, day_raw_df_value, day_raw_df_column, day_raw_df_index, day_feature_dfs_, target, is_buy,),
|
|
block=True,
|
|
)
|
|
|
|
def _sample_ins(self):
|
|
""" """
|
|
return np.random.choice(self.ins_list, 1)[0]
|
|
|
|
def reset(self):
|
|
""" """
|
|
if self.child is None:
|
|
self.child = Process(
|
|
target=self._worker,
|
|
args=(self.order_dir, self.raw_dir, self.features, self.ins_list, self.queue,),
|
|
daemon=True,
|
|
)
|
|
self.child.start()
|
|
|
|
def sample(self):
|
|
""" """
|
|
sample = self.queue.get(block=True)
|
|
return sample
|
|
|
|
def stop(self):
|
|
""" """
|
|
try:
|
|
self.child.terminate()
|
|
except:
|
|
for p in self.child:
|
|
p.terminate()
|
|
|
|
|
|
class TestSampler(Sampler):
|
|
"""The sampler for backtest of single-assert strategies."""
|
|
|
|
def __init__(self, config):
|
|
super().__init__(config)
|
|
self.ins_index = -1
|
|
|
|
def _sample_ins(self):
|
|
""" """
|
|
self.ins_index += 1
|
|
if self.ins_index >= len(self.ins_list):
|
|
return None
|
|
else:
|
|
return self.ins_list[self.ins_index]
|
|
|
|
@staticmethod
|
|
def _worker(order_dir, raw_dir, features, ins_list, queue):
|
|
for ins in ins_list:
|
|
order_df = pd.read_pickle(order_dir + ins + ".pkl.target")
|
|
df_list = []
|
|
for feature in features:
|
|
df_list.append(pd.read_pickle(f"{feature['loc']}/{ins}.pkl"))
|
|
raw_df = pd.read_pickle(raw_dir + ins + ".pkl.backtest")
|
|
date_list = order_df.index.get_level_values(0).tolist()
|
|
for index in range(len(date_list)):
|
|
date = date_list[index]
|
|
day_df_list = []
|
|
day_raw_df = raw_df.loc[pd.IndexSlice[ins, :, date]]
|
|
day_order_df = order_df.iloc[index]
|
|
target = day_order_df["amount"]
|
|
if target == 0:
|
|
continue
|
|
is_buy = bool(day_order_df["order_type"])
|
|
for df in df_list:
|
|
day_df_list.append(df.loc[ins, date].values)
|
|
day_feature_dfs = np.array(day_df_list)
|
|
day_raw_df_index, day_raw_df_value, day_raw_df_column = toArray(day_raw_df)
|
|
day_feature_dfs_ = toArray(day_feature_dfs)
|
|
queue.put(
|
|
(
|
|
ins,
|
|
date,
|
|
day_raw_df_value,
|
|
day_raw_df_column,
|
|
day_raw_df_index,
|
|
day_feature_dfs_,
|
|
target,
|
|
is_buy,
|
|
),
|
|
block=True,
|
|
)
|
|
for _ in range(100):
|
|
queue.put(None)
|
|
|
|
def reset(self, order_dir=None):
|
|
"""
|
|
|
|
reset the sampler and change self.order_dir if order_dir is not None.
|
|
|
|
"""
|
|
if order_dir:
|
|
self.order_dir = order_dir
|
|
self.ins_list = [f[:-11] for f in os.listdir(self.order_dir) if f.endswith("target")]
|
|
if not self.child is None:
|
|
self.child.terminate()
|
|
while not self.queue.empty():
|
|
self.queue.get()
|
|
self.child = Process(
|
|
target=self._worker,
|
|
args=(self.order_dir, self.raw_dir, self.features, self.ins_list, self.queue,),
|
|
daemon=True,
|
|
)
|
|
self.child.start()
|