mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-30 01:21:18 +08:00
338 lines
13 KiB
Python
338 lines
13 KiB
Python
import env
|
|
from vecenv import *
|
|
import sampler
|
|
import logger
|
|
import json
|
|
import os
|
|
import agent
|
|
import model
|
|
import policy
|
|
import random
|
|
import tianshou as ts
|
|
import tqdm
|
|
from tianshou.utils import tqdm_config, MovAvg
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
from collector import *
|
|
import numpy as np
|
|
|
|
|
|
from util import merge_dicts
|
|
|
|
|
|
def get_best_gpu(force=None):
|
|
if force is not None:
|
|
return force
|
|
s = os.popen("nvidia-smi --query-gpu=memory.free --format=csv")
|
|
a = []
|
|
ss = s.read().replace("MiB", "").replace("memory.free", "").split("\n")
|
|
s.close()
|
|
for i in range(1, len(ss) - 1):
|
|
a.append(int(ss[i]))
|
|
best = int(np.argmax(a))
|
|
print("the best GPU is ", best, " with free memories of ", ss[best + 1])
|
|
return best
|
|
|
|
|
|
def setup_seed(seed):
|
|
"""
|
|
|
|
:param seed:
|
|
|
|
"""
|
|
torch.manual_seed(seed)
|
|
torch.cuda.manual_seed_all(seed)
|
|
np.random.seed(seed)
|
|
random.seed(seed)
|
|
torch.backends.cudnn.deterministic = True
|
|
|
|
|
|
class BaseExecutor(object):
|
|
def __init__(
|
|
self, log_dir, resources, env_conf, optim=None, policy_conf=None, network=None, policy_path=None, seed=None,
|
|
):
|
|
"""A base class for executor
|
|
|
|
:param log_dir: The directory to write all the logs.
|
|
:type log_dir: string
|
|
:param resources: A dict which describes available computational resources.
|
|
:type resources: dict
|
|
:param env_conf: Configurations for the envionments.
|
|
:type env_conf: dict
|
|
:param optim: Optimization configuration, defaults to None
|
|
:type optim: dict, optional
|
|
:param policy_conf: Configurations for the RL algorithm, defaults to None
|
|
:type policy_conf: dict, optional
|
|
:param network: Configurations for policy network, defaults to None
|
|
:type network: dict, optional
|
|
:param policy_path: If is not None, would load the policy from this path, defaults to None
|
|
:type policy_path: string, optional
|
|
:param seed: Random seed, defaults to None
|
|
:type seed: int, optional
|
|
"""
|
|
# self.config = config
|
|
self.log_dir = log_dir
|
|
print(self.log_dir)
|
|
if not os.path.exists(self.log_dir):
|
|
os.makedirs(self.log_dir)
|
|
if resources["device"] == "cuda":
|
|
resources["device"] = "cuda:" + str(get_best_gpu())
|
|
self.device = torch.device(resources["device"])
|
|
if seed:
|
|
setup_seed(seed)
|
|
|
|
assert not policy_path is None or not policy_conf is None, "Policy must be defined"
|
|
if policy_path:
|
|
self.policy = torch.load(policy_path, map_location=self.device)
|
|
self.policy.actor.extractor.device = self.device
|
|
# policy.eval()
|
|
elif hasattr(agent, policy_conf["name"]):
|
|
policy_conf["config"] = merge_dicts(policy_conf["config"], resources)
|
|
self.policy = getattr(agent, policy_conf["name"])(policy_conf["config"])
|
|
# print(self.policy)
|
|
else:
|
|
assert not network is None
|
|
if "extractor" in network.keys():
|
|
net = getattr(model, network["extractor"]["name"] + "_Extractor")(
|
|
device=self.device, **network["config"]
|
|
)
|
|
else:
|
|
net = getattr(model, network["name"] + "_Extractor")(device=self.device, **network["config"])
|
|
net.to(self.device)
|
|
actor = getattr(model, network["name"] + "_Actor")(extractor=net, device=self.device, **network["config"])
|
|
actor.to(self.device)
|
|
critic = getattr(model, network["name"] + "_Critic")(extractor=net, device=self.device, **network["config"])
|
|
critic.to(self.device)
|
|
self.optim = torch.optim.Adam(
|
|
list(actor.parameters()) + list(critic.parameters()),
|
|
lr=optim["lr"],
|
|
weight_decay=optim["weight_decay"] if "weight_decay" in optim else 0.0,
|
|
)
|
|
self.dist = torch.distributions.Categorical
|
|
try:
|
|
self.policy = getattr(ts.policy, policy_conf["name"])(
|
|
actor, critic, self.optim, self.dist, **policy_conf["config"]
|
|
)
|
|
except:
|
|
self.policy = getattr(policy, policy_conf["name"])(
|
|
actor, critic, self.optim, self.dist, **policy_conf["config"]
|
|
)
|
|
self.writer = SummaryWriter(self.log_dir)
|
|
|
|
def train(
|
|
self,
|
|
max_epoch,
|
|
step_per_epoch,
|
|
repeat_per_collect,
|
|
collect_per_step,
|
|
batch_size,
|
|
iteration=0,
|
|
global_step=0,
|
|
early_stopping=5,
|
|
*args,
|
|
**kargs,
|
|
):
|
|
"""Run the whole training process.
|
|
|
|
:param max_epoch: The total number of epoch.
|
|
:param step_per_epoch: The times of bp in one epoch.
|
|
:param collect_per_step: Number of episodes to collect before one bp.
|
|
:param repeat_per_collect: Times of bps after every rould of experience collecting.
|
|
:param batch_size: Batch size when bp.
|
|
:param iteration: The iteration when starting the training, used when fine tuning. (Default value = 0)
|
|
:param global_step: The number of steps when starting the training, used when fine tuning. (Default value = 0)
|
|
:param early_stopping: If the test reward does not reach a new high in `early_stopping` iterations, the training would stop. (Default value = 5)
|
|
:returns: The result on test set.
|
|
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def train_round(self, repeat_per_collect, collect_per_step, batch_size, *args, **kargs):
|
|
"""Do an round of training
|
|
|
|
:param collect_per_step: Number of episodes to collect before one bp.
|
|
:param repeat_per_collect: Times of bps after every rould of experience collecting.
|
|
:param batch_size: Batch size when bp.
|
|
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def eval(self, order_dir, save_res=False, logdir=None, *args, **kargs):
|
|
"""Evaluate the policy on orders in order_dir
|
|
|
|
:param order_dir: the orders to be evaluated on.
|
|
:param save_res: whether the result of evaluation be saved to self.logdir/res.json (Default value = False)
|
|
:param logdir: the place to save the .log and .pkl log files to. If None, don't save logfiles. (Default value = None)
|
|
:returns: The result of evaluation.
|
|
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
|
|
class Executor(BaseExecutor):
|
|
def __init__(
|
|
self,
|
|
log_dir,
|
|
resources,
|
|
env_conf,
|
|
train_paths,
|
|
valid_paths,
|
|
test_paths,
|
|
io_conf,
|
|
optim=None,
|
|
policy_conf=None,
|
|
network=None,
|
|
policy_path=None,
|
|
seed=None,
|
|
share_memory=False,
|
|
buffer_size=200000,
|
|
q_learning=False,
|
|
*args,
|
|
**kargs,
|
|
):
|
|
"""[summary]
|
|
|
|
:param log_dir: The directory to write all the logs.
|
|
:type log_dir: string
|
|
:param resources: A dict which describes available computational resources.
|
|
:type resources: dict
|
|
:param env_conf: Configurations for the envionments.
|
|
:type env_conf: dict
|
|
:param train_paths: The paths of training datasets including orders, backtest files and features.
|
|
:type train_paths: string
|
|
:param valid_paths: The paths of validation datasets including orders, backtest files and features.
|
|
:type valid_paths: string
|
|
:param test_paths: The paths of test datasets including orders, backtest files and features.
|
|
:type test_paths: string
|
|
:param io_conf: Configuration for sampler and loggers.
|
|
:type io_conf: dict
|
|
:param share_memory: Whether to use shared memory vecnev, defaults to False
|
|
:type share_memory: bool, optional
|
|
:param buffer_size: The size of replay buffer, defaults to 200000
|
|
:type buffer_size: int, optional
|
|
"""
|
|
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network, policy_path, seed)
|
|
single_env = getattr(env, env_conf["name"])
|
|
env_conf = merge_dicts(env_conf, train_paths)
|
|
env_conf["log"] = True
|
|
print("CPU_COUNT:", resources["num_cpus"])
|
|
if share_memory:
|
|
self.env = ShmemVectorEnv([lambda: single_env(env_conf) for _ in range(resources["num_cpus"])])
|
|
else:
|
|
self.env = SubprocVectorEnv([lambda: single_env(env_conf) for _ in range(resources["num_cpus"])])
|
|
self.test_collector = Collector(policy=self.policy, env=self.env, testing=True, reward_metric=np.sum)
|
|
self.train_collector = Collector(
|
|
self.policy, self.env, buffer=ts.data.ReplayBuffer(buffer_size), reward_metric=np.sum,
|
|
)
|
|
self.train_paths = train_paths
|
|
self.test_paths = test_paths
|
|
self.valid_paths = valid_paths
|
|
train_sampler_conf = train_paths
|
|
train_sampler_conf["features"] = env_conf["features"]
|
|
test_sampler_conf = test_paths
|
|
test_sampler_conf["features"] = env_conf["features"]
|
|
self.train_sampler = getattr(sampler, io_conf["train_sampler"])(train_sampler_conf)
|
|
self.test_sampler = getattr(sampler, io_conf["test_sampler"])(test_sampler_conf)
|
|
self.train_logger = logger.InfoLogger()
|
|
self.test_logger = getattr(logger, io_conf["test_logger"])
|
|
|
|
self.q_learning = q_learning
|
|
|
|
def train(
|
|
self,
|
|
max_epoch,
|
|
step_per_epoch,
|
|
repeat_per_collect,
|
|
collect_per_step,
|
|
batch_size,
|
|
iteration=0,
|
|
global_step=0,
|
|
early_stopping=5,
|
|
train_step_min=0,
|
|
log_valid=True,
|
|
*args,
|
|
**kargs,
|
|
):
|
|
best_epoch, best_reward = -1, -1
|
|
stat = {}
|
|
for epoch in range(1, 1 + max_epoch):
|
|
with tqdm.tqdm(total=step_per_epoch, desc=f"Epoch #{epoch}", **tqdm_config) as t:
|
|
while t.n < t.total:
|
|
result, losses = self.train_round(repeat_per_collect, collect_per_step, batch_size, iteration)
|
|
global_step += result["n/st"]
|
|
iteration += 1
|
|
for k in result.keys():
|
|
self.writer.add_scalar("Train/" + k, result[k], global_step=global_step)
|
|
for k in losses.keys():
|
|
if stat.get(k) is None:
|
|
stat[k] = MovAvg()
|
|
stat[k].add(losses[k])
|
|
self.writer.add_scalar("Train/" + k, stat[k].get(), global_step=global_step)
|
|
t.update(1)
|
|
if t.n <= t.total:
|
|
t.update()
|
|
result = self.eval(
|
|
self.valid_paths["order_dir"], logdir=f"{self.log_dir}/valid/{iteration}/" if log_valid else None,
|
|
)
|
|
for k in result.keys():
|
|
self.writer.add_scalar("Valid/" + k, result[k], global_step=global_step)
|
|
if best_epoch == -1 or best_reward < result["rew"]:
|
|
best_reward = result["rew"]
|
|
best_epoch = epoch
|
|
best_state = self.policy.state_dict()
|
|
early_stop_round = 0
|
|
torch.save(self.policy, f"{self.log_dir}/policy_best")
|
|
elif global_step >= train_step_min:
|
|
early_stop_round += 1
|
|
torch.save(self.policy, f"{self.log_dir}/policy_{epoch}")
|
|
print(
|
|
f'Epoch #{epoch}: test_reward: {result["rew"]:.4f}, ' # train_reward: {result_train["rew"]:.4f}, '
|
|
f"best_reward: {best_reward:.4f} in #{best_epoch}"
|
|
)
|
|
if early_stop_round >= early_stopping:
|
|
print("Early stopped")
|
|
break
|
|
print("Testing...")
|
|
self.policy.load_state_dict(best_state)
|
|
result = self.eval(self.test_paths["order_dir"], logdir=f"{self.log_dir}/test/", save_res=True)
|
|
for k in result.keys():
|
|
self.writer.add_scalar("Test/" + k, result[k], global_step=global_step)
|
|
return result
|
|
|
|
def train_round(self, repeat_per_collect, collect_per_step, batch_size, *args, **kargs):
|
|
self.policy.train()
|
|
self.env.toggle_log(False)
|
|
self.env.sampler = self.train_sampler
|
|
if not self.q_learning:
|
|
self.train_collector.reset()
|
|
result = self.train_collector.collect(n_episode=collect_per_step, log_fn=self.train_logger)
|
|
result = merge_dicts(result, self.train_logger.summary())
|
|
if not self.q_learning:
|
|
losses = self.policy.update(
|
|
0, self.train_collector.buffer, batch_size=batch_size, repeat=repeat_per_collect,
|
|
)
|
|
else:
|
|
losses = self.policy.update(batch_size, self.train_collector.buffer,)
|
|
return result, losses
|
|
|
|
def eval(self, order_dir, save_res=False, logdir=None, *args, **kargs):
|
|
print(f"start evaluating on {order_dir}")
|
|
self.policy.eval()
|
|
self.env.toggle_log(True)
|
|
self.test_sampler.reset(order_dir)
|
|
self.env.sampler = self.test_sampler
|
|
self.test_collector.reset()
|
|
if not logdir is None:
|
|
if not os.path.exists(logdir):
|
|
os.makedirs(logdir)
|
|
eval_logger = self.test_logger(logdir, order_dir)
|
|
eval_logger.reset()
|
|
else:
|
|
eval_logger = self.train_logger
|
|
result = self.test_collector.collect(log_fn=eval_logger)
|
|
result = merge_dicts(result, eval_logger.summary())
|
|
if save_res:
|
|
with open(self.log_dir + "/res.json", "w") as f:
|
|
json.dump(result, f, sort_keys=True, indent=4)
|
|
print(f"finish evaluating on {order_dir}")
|
|
return result
|