"""Cost of Information (COI) computation for thesis pricing simulation.

Implements the corrected COI formulation:

    COI = E[p] - p

where:
- E[p] = expected price BEFORE information revelation (window start price)
- p = actual transaction price (price at which sales occur)

The fundamental insight is that COI should measure PRICE EROSION over time,
not instantaneous margin leakage. When agents explore across sessions:
1. They reveal demand signals that drive platform price adjustments
2. Coordinated agents can find the minimum price across their session pool
3. The price path from window start to transaction captures information leakage

Key components:
- COIWindow: Windowed price erosion measurement over K steps
- compute_coi_window: Per-episode COI from session-level transactions
- coi_erosion: Order statistic erosion (Theorem 1: N agents -> min price)

This fixes the fundamental error of treating COI as instantaneous margin × alpha.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, TYPE_CHECKING
import numpy as np

if TYPE_CHECKING:
    from .simplified import Session

EPS = 1e-10


@dataclass
class COIWindow:
    """Windowed COI measurement capturing price erosion over time.

    Attributes:
        policy: Platform's intended COI (prices at window start - cost)
        agent: Realized COI for agents (prices at transaction - cost)
        leak: COI leakage = policy - agent (price erosion due to exploration)
        survival_ratio: Fraction of intended COI that survives (agent/policy)
        policy_by_product: Per-product policy COI
        agent_by_product: Per-product agent COI
        demand_weights: Demand weights used for aggregation
    """
    policy: float = 0.0          # E[p] - c at window start
    agent: float = 0.0           # p_transaction - c
    leak: float = 0.0            # policy - agent = price erosion
    survival_ratio: float = 1.0  # agent / policy
    policy_by_product: np.ndarray = field(default_factory=lambda: np.zeros(1))
    agent_by_product: np.ndarray = field(default_factory=lambda: np.zeros(1))
    demand_weights: np.ndarray = field(default_factory=lambda: np.zeros(1))

    def to_dict(self) -> Dict[str, float]:
        return {
            'coi_policy': self.policy,
            'coi_agent': self.agent,
            'coi_leak': self.leak,
            'coi_survival': self.survival_ratio,
        }


def compute_coi_window(
    sessions: List["Session"],
    costs: np.ndarray,
    demand_mapping: Dict[str, float] = None,
    window_prices: np.ndarray = None,
) -> COIWindow:
    """Compute COI from session data using the corrected formulation.

    COI = E[p_start] - p_transaction

    This measures how much the platform's pricing power eroded during the window.
    Price at window start represents E[p] (what we expected to charge).
    Transaction prices represent p (what we actually charged).

    Args:
        sessions: List of sessions with events containing price_seen and purchases
        costs: Product costs array
        demand_mapping: Optional session_id -> demand proxy mapping
        window_prices: Optional explicit window start prices (otherwise use first seen)

    Returns:
        COIWindow with erosion metrics
    """
    if not sessions:
        n = len(costs)
        zeros = np.zeros(n)
        return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=1.0,
                        policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)

    n = len(costs)
    demand_mapping = demand_mapping or {}

    # Track prices seen at start (E[p]) and transaction prices (p)
    first_prices = np.zeros(n)  # first price seen per product (window start proxy)
    transaction_prices = np.zeros(n)  # prices at which purchases occurred
    transaction_counts = np.zeros(n)
    view_counts = np.zeros(n)
    demand_weights = np.zeros(n)

    for sess in sessions:
        sid = sess.sid
        sess_demand = demand_mapping.get(sid, 1.0)

        for e in sess.events:
            pidx = e.product_idx
            if pidx < 0 or pidx >= n:
                continue

            price_seen = float(e.price_seen)

            # Track first price seen (proxy for E[p] at window start)
            if view_counts[pidx] == 0:
                first_prices[pidx] = price_seen
            view_counts[pidx] += 1

            # Track transaction prices
            if e.action == "purchase":
                transaction_prices[pidx] += price_seen
                transaction_counts[pidx] += 1
                demand_weights[pidx] += sess_demand

    # Compute per-product COI
    # Policy COI: what we intended to charge (first seen price - cost)
    policy_by_product = np.zeros(n)
    agent_by_product = np.zeros(n)

    for i in range(n):
        if view_counts[i] > 0:
            # Use explicit window prices if provided, else first seen
            start_price = window_prices[i] if window_prices is not None else first_prices[i]
            policy_by_product[i] = max(0, start_price - costs[i])

        if transaction_counts[i] > 0:
            avg_transaction = transaction_prices[i] / transaction_counts[i]
            agent_by_product[i] = max(0, avg_transaction - costs[i])

    # Aggregate with demand weighting
    total_demand = np.sum(demand_weights) + EPS
    weights = demand_weights / total_demand

    # Only count products with transactions for fair comparison
    active_mask = transaction_counts > 0
    if np.any(active_mask):
        policy = float(np.sum(policy_by_product[active_mask] * weights[active_mask]) /
                      (np.sum(weights[active_mask]) + EPS))
        agent = float(np.sum(agent_by_product[active_mask] * weights[active_mask]) /
                     (np.sum(weights[active_mask]) + EPS))
    else:
        # No transactions - use view-weighted policy COI
        view_weights = view_counts / (np.sum(view_counts) + EPS)
        policy = float(np.sum(policy_by_product * view_weights))
        agent = policy  # No erosion without transactions

    # Leak = price erosion due to information revelation
    leak = max(0, policy - agent)
    survival = agent / (policy + EPS) if policy > EPS else 1.0

    return COIWindow(
        policy=policy,
        agent=agent,
        leak=leak,
        survival_ratio=float(np.clip(survival, 0, 1)),
        policy_by_product=policy_by_product,
        agent_by_product=agent_by_product,
        demand_weights=demand_weights,
    )


def coi_erosion(policy_coi: float, agent_coi: float) -> float:
    """Compute COI erosion rate: (policy - agent) / policy.

    Returns the fraction of intended COI that was lost to information leakage.
    0 = no erosion, 1 = complete erosion.
    """
    if policy_coi < EPS:
        return 0.0
    return float(np.clip((policy_coi - agent_coi) / policy_coi, 0, 1))


def order_statistic_erosion(n_agents: int, price_std: float, base_margin: float = 1.0) -> float:
    """Compute COI erosion from order statistic effect (Theorem 1).

    When N agents independently query prices:
    - Each sees a price p_i ~ N(μ, σ²)
    - They coordinate to buy at min(p_1, ..., p_N)
    - Expected minimum: μ - σ * E[order_stat]

    As N -> ∞, E[min] -> p_min, so COI -> 0.

    This quantifies the price discovery benefit of multiple sessions.

    Args:
        n_agents: Number of independent agent sessions
        price_std: Standard deviation of price distribution
        base_margin: Expected margin (μ - cost)

    Returns:
        Erosion rate in [0, 1]
    """
    if n_agents <= 1 or price_std < EPS:
        return 0.0

    # For standard normal order statistics, E[min of N] ≈ -Φ^{-1}(1/(N+1))
    # For large N, this grows like sqrt(2 * log(N))
    log_n = np.log(n_agents)
    if log_n < 0.1:
        return 0.0

    # Extreme value theory: expected min shift
    shift = price_std * (np.sqrt(2 * log_n) -
                        (np.log(log_n) + np.log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))

    # Erosion = shift / base_margin, capped at 1
    return float(np.clip(shift / (base_margin + EPS), 0, 1))


@dataclass
class COITracker:
    """Track COI over multiple windows for temporal analysis.

    This addresses the user's insight: compute COI over K episodes to see
    how prices change from window start to end.

    If at start of window price is A and by end it's B, the difference
    A - B represents COI leakage from exploratory sessions.
    """
    window_size: int = 10  # K episodes per window
    _price_history: List[np.ndarray] = field(default_factory=list)
    _transaction_history: List[np.ndarray] = field(default_factory=list)
    _coi_history: List[float] = field(default_factory=list)

    def add_step(self, prices: np.ndarray, transactions: np.ndarray = None):
        """Record price observation for current step."""
        self._price_history.append(prices.copy())
        if transactions is not None:
            self._transaction_history.append(transactions.copy())

    def compute_window_coi(self, costs: np.ndarray) -> float:
        """Compute COI over the current window.

        COI = E[p_start] - E[p_end] for the window.
        This captures price erosion due to information revelation.
        """
        if len(self._price_history) < 2:
            return 0.0

        # Get prices at window boundaries
        window_start = max(0, len(self._price_history) - self.window_size)
        start_prices = self._price_history[window_start]
        end_prices = self._price_history[-1]

        # COI = (start_price - cost) - (end_price - cost) = start_price - end_price
        start_margin = np.mean(start_prices - costs)
        end_margin = np.mean(end_prices - costs)

        coi = max(0, start_margin - end_margin)
        self._coi_history.append(coi)
        return coi

    def get_cumulative_erosion(self, costs: np.ndarray) -> float:
        """Compute total COI erosion from first observation to now."""
        if len(self._price_history) < 2:
            return 0.0

        initial = np.mean(self._price_history[0] - costs)
        current = np.mean(self._price_history[-1] - costs)
        return max(0, initial - current)

    def get_erosion_trend(self) -> float:
        """Get average COI per window (erosion rate)."""
        if not self._coi_history:
            return 0.0
        return float(np.mean(self._coi_history))

    def reset(self):
        """Reset tracker for new episode."""
        self._price_history.clear()
        self._transaction_history.clear()
        self._coi_history.clear()


def compute_multi_session_coi(
    sessions: List["Session"],
    costs: np.ndarray,
    alpha: float,
    initial_prices: np.ndarray,
) -> Dict[str, float]:
    """Compute COI accounting for multi-session agent behavior.

    This is the key fix for the fundamental error:
    - Agents use different sessions to gather information
    - Each session reveals price information
    - Coordinated agents find the minimum across their session pool

    The COI is computed as:
    1. What platform intended to charge: initial_prices - costs
    2. What agents actually paid: min(prices seen across sessions) - costs
    3. Leak = (1) - (2)

    Args:
        sessions: All sessions in the episode
        costs: Product costs
        alpha: Contamination level (fraction of agent sessions)
        initial_prices: Prices at episode start (E[p])

    Returns:
        Dictionary with COI metrics
    """
    n = len(costs)

    # Separate agent and human sessions by ground truth label
    agent_sessions = [s for s in sessions if s.actor == "A"]
    human_sessions = [s for s in sessions if s.actor == "H"]

    # Track prices seen by agents per product (for min finding)
    agent_prices_seen: Dict[int, List[float]] = {i: [] for i in range(n)}
    human_prices_paid: Dict[int, List[float]] = {i: [] for i in range(n)}

    for sess in agent_sessions:
        for e in sess.events:
            if 0 <= e.product_idx < n:
                agent_prices_seen[e.product_idx].append(e.price_seen)

    for sess in human_sessions:
        for e in sess.events:
            if 0 <= e.product_idx < n and e.action == "purchase":
                human_prices_paid[e.product_idx].append(e.price_seen)

    # Compute COI components
    policy_coi = float(np.mean(initial_prices - costs))  # E[p] - c

    # Agent COI: they find the minimum price via exploration
    agent_coi_by_product = np.zeros(n)
    for i in range(n):
        if agent_prices_seen[i]:
            min_price = min(agent_prices_seen[i])
            agent_coi_by_product[i] = max(0, min_price - costs[i])
        else:
            agent_coi_by_product[i] = initial_prices[i] - costs[i]

    agent_coi = float(np.mean(agent_coi_by_product))

    # Human COI: they pay whatever price is offered
    human_coi_by_product = np.zeros(n)
    for i in range(n):
        if human_prices_paid[i]:
            avg_price = np.mean(human_prices_paid[i])
            human_coi_by_product[i] = max(0, avg_price - costs[i])
        else:
            human_coi_by_product[i] = initial_prices[i] - costs[i]

    human_coi = float(np.mean(human_coi_by_product))

    # Total leak: weighted by contamination
    # Agents erode COI, humans pay full price
    realized_coi = (1 - alpha) * human_coi + alpha * agent_coi
    leak = policy_coi - realized_coi

    # Order statistic effect: more agents = more erosion
    n_agents = len(agent_sessions)
    price_std = float(np.std(initial_prices))
    order_erosion = order_statistic_erosion(n_agents, price_std, policy_coi)

    return {
        'policy_coi': policy_coi,
        'agent_coi': agent_coi,
        'human_coi': human_coi,
        'realized_coi': realized_coi,
        'leak': leak,
        'order_stat_erosion': order_erosion,
        'n_agent_sessions': n_agents,
        'n_human_sessions': len(human_sessions),
        'survival_ratio': realized_coi / (policy_coi + EPS) if policy_coi > EPS else 1.0,
    }