from joblib import Parallel, delayed
from numba import njit, prange
from tianshou.policy import BasePolicy
from tianshou.data import Batch
import numpy as np
import torch
from env import nan_weighted_avg


class TWAP(BasePolicy):
    """ The TWAP strategy. """

    def __init__(self, config):
        super().__init__()
        self.max_step_num = config["max_step_num"]
        self.num_cpus = config["num_cpus"]

    # @njit(parallel=True)
    def forward(self, batch: Batch, state=None, **kwargs) -> Batch:
        act = [1] * len(batch.obs.private)
        return Batch(act=act, state=state)

    def learn(self, batch, batch_size, repeat):
        pass

    def process_fn(self, batch, buffer, indice):
        pass


class VWAP(BasePolicy):
    """ The VWAP strategy."""

    def __init__(self, config):
        super().__init__()

    def forward(self, batch, state, **kwargs):
        obs = batch.obs
        r = np.stack(obs.prediction).reshape(-1)
        return Batch(act=r, state=state)

    def learn(self, batch, batch_size, repeat):
        pass

    def process_fn(self, batch, buffer, indice):
        pass


class AC(VWAP):
    """Almgren-Chriss strategy."""

    def __init__(self, config):
        super().__init__(config)
        self.T = config["max_step_num"]
        self.gamma = 0
        self.tau = 1
        self.lamb = config["lambda"]
        self.eps = 0.0625
        self.alpha = 0.02
        self.eta = 2.5e-6

    def forward(self, batch, state, **kwargs):
        obs = batch.obs
        sig = np.stack(obs.prediction).reshape(-1)
        sell = ~np.stack(obs.is_buy).astype(np.bool)
        data = np.stack(obs.private)
        t = data[:, 2]
        t = t + 1
        k_tild = self.lamb / self.eta * sig * sig
        k = np.arccosh(k_tild / 2 + 1)
        act = (np.sinh(k * (self.T - t)) - np.sinh(k * (self.T - t - 1))) / np.sinh(k * self.T)
        return Batch(act=act, state=state)