1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 10:01:19 +08:00
Files
qlib/examples/trade/logger/single_logger.py
Yuchen Fang a03b08bb4c format
2021-01-28 00:41:02 +08:00

232 lines
7.5 KiB
Python

import pandas as pd
import numpy as np
import os
from multiprocessing import Queue, Process
import time
def GLR(values):
"""
Calculate -P(value | value > 0) / P(value | value < 0)
"""
pos = []
neg = []
for i in values:
if i > 0:
pos.append(i)
elif i < 0:
neg.append(i)
return -np.mean(pos) / np.mean(neg)
class DFLogger(object):
"""The logger for single-assert backtest.
Would save .pkl and .log in log_dir
"""
def __init__(self, log_dir, order_dir, writer=None):
self.order_dir = order_dir + "/"
self.log_dir = log_dir + "/"
if not os.path.exists(log_dir):
os.mkdir(log_dir)
self.queue = Queue(100000)
self.raw_log_dir = self.log_dir
@staticmethod
def _worker(log_dir, order_dir, queue):
df_cache = {}
stat_cache = {}
if not os.path.exists(log_dir):
os.mkdir(log_dir)
while True:
info = queue.get(block=True)
if info == "stop":
summary = {}
for k, v in stat_cache.items():
if not k.startswith("money"):
summary[k + "_std"] = np.nanstd(v)
summary[k + "_mean"] = np.nanmean(v)
try:
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
except:
# summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache['money_sell'])
pass
try:
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
except:
pass
try:
for k in ["obs0_PR", "ffr", "PA"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
except:
pass
summary["GLR"] = GLR(stat_cache["PA"])
try:
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
except:
pass
try:
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
except:
pass
queue.put(summary)
break
elif len(info) == 0:
continue
else:
df = info.pop("df")
res = info.pop("res")
ins = df.index[0][0]
if ins not in df_cache:
df_cache[ins] = (
[],
[],
len(pd.read_pickle(order_dir + ins + ".pkl.target")),
)
df_cache[ins][0].append(df)
df_cache[ins][1].append(res)
if len(df_cache[ins][0]) == df_cache[ins][2]:
pd.concat(df_cache[ins][0]).to_pickle(log_dir + ins + ".log")
pd.concat(df_cache[ins][1]).to_pickle(log_dir + ins + ".pkl")
del df_cache[ins]
for k, v in info.items():
if k not in stat_cache:
stat_cache[k] = []
if hasattr(v, "__len__"):
stat_cache[k] += list(v)
else:
stat_cache[k].append(v)
def reset(self):
""" """
while not self.queue.empty():
self.queue.get()
assert self.queue.empty()
self.child = Process(target=self._worker, args=(self.log_dir, self.order_dir, self.queue), daemon=True,)
self.child.start()
def set_step(self, step):
self.log_dir = f"{self.raw_log_dir}{step}/"
self.reset()
def __call__(self, infos):
for info in infos:
if "env_id" in info:
info.pop("env_id")
self.update(infos)
def update(self, infos):
"""store values in info into the logger"""
for info in infos:
self.queue.put(info, block=True)
def summary(self):
""":return: The mean and std of values in infos stored in logger"""
summary = {}
self.queue.put("stop", block=True)
self.child.join()
self.child.close()
assert self.queue.qsize() == 1
summary = self.queue.get()
return summary
class InfoLogger(DFLogger):
""" """
def __init__(self, *args):
self.stat_cache = {}
self.queue = Queue(10000)
self.child = Process(target=self._worker, args=(self.queue,), daemon=True)
self.child.start()
def _worker(logdir, queue):
stat_cache = {}
while True:
info = queue.get(block=True)
if info == "stop":
summary = {}
for k, v in stat_cache.items():
if not k.startswith("money"):
summary[k + "_std"] = np.nanstd(v)
summary[k + "_mean"] = np.nanmean(v)
try:
for k in ["PR_sell", "ffr_sell", "PA_sell"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_sell"])
except:
pass
try:
for k in ["PR_buy", "ffr_buy", "PA_buy"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money_buy"])
except:
pass
try:
for k in ["obs0_PR", "ffr", "PA"]:
summary["weighted_" + k] = np.average(stat_cache[k], weights=stat_cache["money"])
except:
pass
summary["GLR"] = GLR(stat_cache["PA"])
try:
summary["GLR_sell"] = GLR(stat_cache["PA_sell"])
except:
pass
try:
summary["GLR_buy"] = GLR(stat_cache["PA_buy"])
except:
pass
queue.put(summary)
stat_cache = {}
time.sleep(5)
continue
if len(info) == 0:
continue
for k, v in info.items():
if k == "res" or k == "df":
continue
if k not in stat_cache:
stat_cache[k] = []
if hasattr(v, "__len__"):
stat_cache[k] += list(v)
else:
stat_cache[k].append(v)
def _update(self, info):
if len(info) == 0:
return
ins = df.index[0][0]
for k, v in info.items():
if k not in self.stat_cache:
self.stat_cache[k] = []
if hasattr(v, "__len__"):
self.stat_cache[k] += list(v)
else:
self.stat_cache[k].append(v)
def summary(self):
""" """
while not self.queue.empty():
# print('not empty')
# print(self.queue.qsize())
time.sleep(1)
self.queue.put("stop")
# self.child.join()
time.sleep(1)
while not self.queue.qsize() == 1:
# print(self.queue.qsize())
time.sleep(1)
assert self.queue.qsize() == 1
summary = self.queue.get()
return summary
def set_step(self, step):
return