mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-01 10:01:19 +08:00
232 lines
7.5 KiB
Python
232 lines
7.5 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
from multiprocessing import Queue, Process
|
|
import time
|
|
|
|
|
|
def GLR(values):
|
|
"""
|
|
|
|
Calculate -P(value | value > 0) / P(value | value < 0)
|
|
|
|
"""
|
|
pos = []
|
|
neg = []
|
|
for i in values:
|
|
if i > 0:
|
|
pos.append(i)
|
|
elif i < 0:
|
|
neg.append(i)
|
|
return -np.mean(pos) / np.mean(neg)
|
|
|
|
|
|
class DFLogger(object):
|
|
"""The logger for single-assert backtest.
|
|
Would save .pkl and .log in log_dir
|
|
|
|
|
|
"""
|
|
|
|
def __init__(self, log_dir, order_dir, writer=None):
|
|
self.order_dir = order_dir + "/"
|
|
self.log_dir = log_dir + "/"
|
|
if not os.path.exists(log_dir):
|
|
os.mkdir(log_dir)
|
|
self.queue = Queue(100000)
|
|
self.raw_log_dir = self.log_dir
|
|
|
|
@staticmethod
|
|
def _worker(log_dir, order_dir, queue):
|
|
df_cache = {}
|
|
stat_cache = {}
|
|
if not os.path.exists(log_dir):
|
|
os.mkdir(log_dir)
|
|
while True:
|
|
info = queue.get(block=True)
|
|
if info == "stop":
|
|
summary = {}
|
|
for k, v in stat_cache.items():
|
|
if not k.startswith("money"):
|
|
summary[k + "_std"] = np.nanstd(v)
|
|
summary[k + "_mean"] = np.nanmean(v)
|
|
try:
|
|
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
|
|
except:
|
|
# summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache['money_sell'])
|
|
pass
|
|
try:
|
|
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
|
|
except:
|
|
pass
|
|
try:
|
|
for k in ["obs0_PR", "ffr", "PA"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
|
|
except:
|
|
pass
|
|
summary["GLR"] = GLR(stat_cache["PA"])
|
|
try:
|
|
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
|
|
except:
|
|
pass
|
|
try:
|
|
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
|
|
except:
|
|
pass
|
|
queue.put(summary)
|
|
break
|
|
elif len(info) == 0:
|
|
continue
|
|
else:
|
|
df = info.pop("df")
|
|
res = info.pop("res")
|
|
ins = df.index[0][0]
|
|
if ins not in df_cache:
|
|
df_cache[ins] = (
|
|
[],
|
|
[],
|
|
len(pd.read_pickle(order_dir + ins + ".pkl.target")),
|
|
)
|
|
df_cache[ins][0].append(df)
|
|
df_cache[ins][1].append(res)
|
|
if len(df_cache[ins][0]) == df_cache[ins][2]:
|
|
pd.concat(df_cache[ins][0]).to_pickle(log_dir + ins + ".log")
|
|
pd.concat(df_cache[ins][1]).to_pickle(log_dir + ins + ".pkl")
|
|
del df_cache[ins]
|
|
for k, v in info.items():
|
|
if k not in stat_cache:
|
|
stat_cache[k] = []
|
|
if hasattr(v, "__len__"):
|
|
stat_cache[k] += list(v)
|
|
else:
|
|
stat_cache[k].append(v)
|
|
|
|
def reset(self):
|
|
""" """
|
|
while not self.queue.empty():
|
|
self.queue.get()
|
|
assert self.queue.empty()
|
|
self.child = Process(target=self._worker, args=(self.log_dir, self.order_dir, self.queue), daemon=True,)
|
|
self.child.start()
|
|
|
|
def set_step(self, step):
|
|
|
|
self.log_dir = f"{self.raw_log_dir}{step}/"
|
|
self.reset()
|
|
|
|
def __call__(self, infos):
|
|
for info in infos:
|
|
if "env_id" in info:
|
|
info.pop("env_id")
|
|
self.update(infos)
|
|
|
|
def update(self, infos):
|
|
"""store values in info into the logger"""
|
|
for info in infos:
|
|
self.queue.put(info, block=True)
|
|
|
|
def summary(self):
|
|
""":return: The mean and std of values in infos stored in logger"""
|
|
summary = {}
|
|
self.queue.put("stop", block=True)
|
|
self.child.join()
|
|
self.child.close()
|
|
assert self.queue.qsize() == 1
|
|
summary = self.queue.get()
|
|
|
|
return summary
|
|
|
|
|
|
class InfoLogger(DFLogger):
|
|
""" """
|
|
|
|
def __init__(self, *args):
|
|
self.stat_cache = {}
|
|
self.queue = Queue(10000)
|
|
self.child = Process(target=self._worker, args=(self.queue,), daemon=True)
|
|
self.child.start()
|
|
|
|
def _worker(logdir, queue):
|
|
stat_cache = {}
|
|
while True:
|
|
info = queue.get(block=True)
|
|
if info == "stop":
|
|
summary = {}
|
|
for k, v in stat_cache.items():
|
|
if not k.startswith("money"):
|
|
summary[k + "_std"] = np.nanstd(v)
|
|
summary[k + "_mean"] = np.nanmean(v)
|
|
try:
|
|
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
|
|
except:
|
|
pass
|
|
try:
|
|
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
|
|
except:
|
|
pass
|
|
try:
|
|
for k in ["obs0_PR", "ffr", "PA"]:
|
|
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
|
|
except:
|
|
pass
|
|
summary["GLR"] = GLR(stat_cache["PA"])
|
|
try:
|
|
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
|
|
except:
|
|
pass
|
|
try:
|
|
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
|
|
except:
|
|
pass
|
|
queue.put(summary)
|
|
stat_cache = {}
|
|
time.sleep(5)
|
|
continue
|
|
if len(info) == 0:
|
|
continue
|
|
for k, v in info.items():
|
|
if k == "res" or k == "df":
|
|
continue
|
|
if k not in stat_cache:
|
|
stat_cache[k] = []
|
|
if hasattr(v, "__len__"):
|
|
stat_cache[k] += list(v)
|
|
else:
|
|
stat_cache[k].append(v)
|
|
|
|
def _update(self, info):
|
|
if len(info) == 0:
|
|
return
|
|
ins = df.index[0][0]
|
|
for k, v in info.items():
|
|
if k not in self.stat_cache:
|
|
self.stat_cache[k] = []
|
|
if hasattr(v, "__len__"):
|
|
self.stat_cache[k] += list(v)
|
|
else:
|
|
self.stat_cache[k].append(v)
|
|
|
|
def summary(self):
|
|
""" """
|
|
while not self.queue.empty():
|
|
# print('not empty')
|
|
# print(self.queue.qsize())
|
|
time.sleep(1)
|
|
self.queue.put("stop")
|
|
# self.child.join()
|
|
time.sleep(1)
|
|
while not self.queue.qsize() == 1:
|
|
# print(self.queue.qsize())
|
|
time.sleep(1)
|
|
assert self.queue.qsize() == 1
|
|
summary = self.queue.get()
|
|
|
|
return summary
|
|
|
|
def set_step(self, step):
|
|
return
|