fix: correct COI formulation to measure price erosion over time

The fundamental error was treating COI as instantaneous margin × alpha.
The corrected formulation is:

    COI = E[p_start] - p_transaction

This measures price erosion over time, capturing how agents using
multiple sessions gather information and drive prices down.

Key changes:
- Add coi.py with COIWindow, COITracker, and compute_multi_session_coi
- Add separability.py with KL-divergence behavioral classification
- Update simplified_env.py to track initial prices and compute windowed COI
- Add corrected COI metrics (coi_*_corrected) alongside legacy metrics

The new approach:
1. Tracks prices at episode start as E[p] (expected price)
2. Computes transaction prices as p (actual sale price)
3. Measures leak as the difference (price erosion)
4. Includes order statistic erosion (Theorem 1: N agents -> min price)
This commit is contained in:
Claude
2026-01-26 15:23:32 +00:00
parent 98a9a3738c
commit 3e0f3d007c
4 changed files with 545 additions and 2 deletions

378
lab/case/thesis/coi.py Normal file
View File

@@ -0,0 +1,378 @@
"""Cost of Information (COI) computation for thesis pricing simulation.
Implements the corrected COI formulation:
COI = E[p] - p
where:
- E[p] = expected price BEFORE information revelation (window start price)
- p = actual transaction price (price at which sales occur)
The fundamental insight is that COI should measure PRICE EROSION over time,
not instantaneous margin leakage. When agents explore across sessions:
1. They reveal demand signals that drive platform price adjustments
2. Coordinated agents can find the minimum price across their session pool
3. The price path from window start to transaction captures information leakage
Key components:
- COIWindow: Windowed price erosion measurement over K steps
- compute_coi_window: Per-episode COI from session-level transactions
- coi_erosion: Order statistic erosion (Theorem 1: N agents -> min price)
This fixes the fundamental error of treating COI as instantaneous margin × alpha.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .simplified import Session
EPS = 1e-10
@dataclass
class COIWindow:
"""Windowed COI measurement capturing price erosion over time.
Attributes:
policy: Platform's intended COI (prices at window start - cost)
agent: Realized COI for agents (prices at transaction - cost)
leak: COI leakage = policy - agent (price erosion due to exploration)
survival_ratio: Fraction of intended COI that survives (agent/policy)
policy_by_product: Per-product policy COI
agent_by_product: Per-product agent COI
demand_weights: Demand weights used for aggregation
"""
policy: float = 0.0 # E[p] - c at window start
agent: float = 0.0 # p_transaction - c
leak: float = 0.0 # policy - agent = price erosion
survival_ratio: float = 1.0 # agent / policy
policy_by_product: np.ndarray = field(default_factory=lambda: np.zeros(1))
agent_by_product: np.ndarray = field(default_factory=lambda: np.zeros(1))
demand_weights: np.ndarray = field(default_factory=lambda: np.zeros(1))
def to_dict(self) -> Dict[str, float]:
return {
'coi_policy': self.policy,
'coi_agent': self.agent,
'coi_leak': self.leak,
'coi_survival': self.survival_ratio,
}
def compute_coi_window(
sessions: List["Session"],
costs: np.ndarray,
demand_mapping: Dict[str, float] = None,
window_prices: np.ndarray = None,
) -> COIWindow:
"""Compute COI from session data using the corrected formulation.
COI = E[p_start] - p_transaction
This measures how much the platform's pricing power eroded during the window.
Price at window start represents E[p] (what we expected to charge).
Transaction prices represent p (what we actually charged).
Args:
sessions: List of sessions with events containing price_seen and purchases
costs: Product costs array
demand_mapping: Optional session_id -> demand proxy mapping
window_prices: Optional explicit window start prices (otherwise use first seen)
Returns:
COIWindow with erosion metrics
"""
if not sessions:
n = len(costs)
zeros = np.zeros(n)
return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=1.0,
policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)
n = len(costs)
demand_mapping = demand_mapping or {}
# Track prices seen at start (E[p]) and transaction prices (p)
first_prices = np.zeros(n) # first price seen per product (window start proxy)
transaction_prices = np.zeros(n) # prices at which purchases occurred
transaction_counts = np.zeros(n)
view_counts = np.zeros(n)
demand_weights = np.zeros(n)
for sess in sessions:
sid = sess.sid
sess_demand = demand_mapping.get(sid, 1.0)
for e in sess.events:
pidx = e.product_idx
if pidx < 0 or pidx >= n:
continue
price_seen = float(e.price_seen)
# Track first price seen (proxy for E[p] at window start)
if view_counts[pidx] == 0:
first_prices[pidx] = price_seen
view_counts[pidx] += 1
# Track transaction prices
if e.action == "purchase":
transaction_prices[pidx] += price_seen
transaction_counts[pidx] += 1
demand_weights[pidx] += sess_demand
# Compute per-product COI
# Policy COI: what we intended to charge (first seen price - cost)
policy_by_product = np.zeros(n)
agent_by_product = np.zeros(n)
for i in range(n):
if view_counts[i] > 0:
# Use explicit window prices if provided, else first seen
start_price = window_prices[i] if window_prices is not None else first_prices[i]
policy_by_product[i] = max(0, start_price - costs[i])
if transaction_counts[i] > 0:
avg_transaction = transaction_prices[i] / transaction_counts[i]
agent_by_product[i] = max(0, avg_transaction - costs[i])
# Aggregate with demand weighting
total_demand = np.sum(demand_weights) + EPS
weights = demand_weights / total_demand
# Only count products with transactions for fair comparison
active_mask = transaction_counts > 0
if np.any(active_mask):
policy = float(np.sum(policy_by_product[active_mask] * weights[active_mask]) /
(np.sum(weights[active_mask]) + EPS))
agent = float(np.sum(agent_by_product[active_mask] * weights[active_mask]) /
(np.sum(weights[active_mask]) + EPS))
else:
# No transactions - use view-weighted policy COI
view_weights = view_counts / (np.sum(view_counts) + EPS)
policy = float(np.sum(policy_by_product * view_weights))
agent = policy # No erosion without transactions
# Leak = price erosion due to information revelation
leak = max(0, policy - agent)
survival = agent / (policy + EPS) if policy > EPS else 1.0
return COIWindow(
policy=policy,
agent=agent,
leak=leak,
survival_ratio=float(np.clip(survival, 0, 1)),
policy_by_product=policy_by_product,
agent_by_product=agent_by_product,
demand_weights=demand_weights,
)
def coi_erosion(policy_coi: float, agent_coi: float) -> float:
"""Compute COI erosion rate: (policy - agent) / policy.
Returns the fraction of intended COI that was lost to information leakage.
0 = no erosion, 1 = complete erosion.
"""
if policy_coi < EPS:
return 0.0
return float(np.clip((policy_coi - agent_coi) / policy_coi, 0, 1))
def order_statistic_erosion(n_agents: int, price_std: float, base_margin: float = 1.0) -> float:
"""Compute COI erosion from order statistic effect (Theorem 1).
When N agents independently query prices:
- Each sees a price p_i ~ N(μ, σ²)
- They coordinate to buy at min(p_1, ..., p_N)
- Expected minimum: μ - σ * E[order_stat]
As N -> ∞, E[min] -> p_min, so COI -> 0.
This quantifies the price discovery benefit of multiple sessions.
Args:
n_agents: Number of independent agent sessions
price_std: Standard deviation of price distribution
base_margin: Expected margin (μ - cost)
Returns:
Erosion rate in [0, 1]
"""
if n_agents <= 1 or price_std < EPS:
return 0.0
# For standard normal order statistics, E[min of N] ≈ -Φ^{-1}(1/(N+1))
# For large N, this grows like sqrt(2 * log(N))
log_n = np.log(n_agents)
if log_n < 0.1:
return 0.0
# Extreme value theory: expected min shift
shift = price_std * (np.sqrt(2 * log_n) -
(np.log(log_n) + np.log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
# Erosion = shift / base_margin, capped at 1
return float(np.clip(shift / (base_margin + EPS), 0, 1))
@dataclass
class COITracker:
"""Track COI over multiple windows for temporal analysis.
This addresses the user's insight: compute COI over K episodes to see
how prices change from window start to end.
If at start of window price is A and by end it's B, the difference
A - B represents COI leakage from exploratory sessions.
"""
window_size: int = 10 # K episodes per window
_price_history: List[np.ndarray] = field(default_factory=list)
_transaction_history: List[np.ndarray] = field(default_factory=list)
_coi_history: List[float] = field(default_factory=list)
def add_step(self, prices: np.ndarray, transactions: np.ndarray = None):
"""Record price observation for current step."""
self._price_history.append(prices.copy())
if transactions is not None:
self._transaction_history.append(transactions.copy())
def compute_window_coi(self, costs: np.ndarray) -> float:
"""Compute COI over the current window.
COI = E[p_start] - E[p_end] for the window.
This captures price erosion due to information revelation.
"""
if len(self._price_history) < 2:
return 0.0
# Get prices at window boundaries
window_start = max(0, len(self._price_history) - self.window_size)
start_prices = self._price_history[window_start]
end_prices = self._price_history[-1]
# COI = (start_price - cost) - (end_price - cost) = start_price - end_price
start_margin = np.mean(start_prices - costs)
end_margin = np.mean(end_prices - costs)
coi = max(0, start_margin - end_margin)
self._coi_history.append(coi)
return coi
def get_cumulative_erosion(self, costs: np.ndarray) -> float:
"""Compute total COI erosion from first observation to now."""
if len(self._price_history) < 2:
return 0.0
initial = np.mean(self._price_history[0] - costs)
current = np.mean(self._price_history[-1] - costs)
return max(0, initial - current)
def get_erosion_trend(self) -> float:
"""Get average COI per window (erosion rate)."""
if not self._coi_history:
return 0.0
return float(np.mean(self._coi_history))
def reset(self):
"""Reset tracker for new episode."""
self._price_history.clear()
self._transaction_history.clear()
self._coi_history.clear()
def compute_multi_session_coi(
sessions: List["Session"],
costs: np.ndarray,
alpha: float,
initial_prices: np.ndarray,
) -> Dict[str, float]:
"""Compute COI accounting for multi-session agent behavior.
This is the key fix for the fundamental error:
- Agents use different sessions to gather information
- Each session reveals price information
- Coordinated agents find the minimum across their session pool
The COI is computed as:
1. What platform intended to charge: initial_prices - costs
2. What agents actually paid: min(prices seen across sessions) - costs
3. Leak = (1) - (2)
Args:
sessions: All sessions in the episode
costs: Product costs
alpha: Contamination level (fraction of agent sessions)
initial_prices: Prices at episode start (E[p])
Returns:
Dictionary with COI metrics
"""
n = len(costs)
# Separate agent and human sessions by ground truth label
agent_sessions = [s for s in sessions if s.actor == "A"]
human_sessions = [s for s in sessions if s.actor == "H"]
# Track prices seen by agents per product (for min finding)
agent_prices_seen: Dict[int, List[float]] = {i: [] for i in range(n)}
human_prices_paid: Dict[int, List[float]] = {i: [] for i in range(n)}
for sess in agent_sessions:
for e in sess.events:
if 0 <= e.product_idx < n:
agent_prices_seen[e.product_idx].append(e.price_seen)
for sess in human_sessions:
for e in sess.events:
if 0 <= e.product_idx < n and e.action == "purchase":
human_prices_paid[e.product_idx].append(e.price_seen)
# Compute COI components
policy_coi = float(np.mean(initial_prices - costs)) # E[p] - c
# Agent COI: they find the minimum price via exploration
agent_coi_by_product = np.zeros(n)
for i in range(n):
if agent_prices_seen[i]:
min_price = min(agent_prices_seen[i])
agent_coi_by_product[i] = max(0, min_price - costs[i])
else:
agent_coi_by_product[i] = initial_prices[i] - costs[i]
agent_coi = float(np.mean(agent_coi_by_product))
# Human COI: they pay whatever price is offered
human_coi_by_product = np.zeros(n)
for i in range(n):
if human_prices_paid[i]:
avg_price = np.mean(human_prices_paid[i])
human_coi_by_product[i] = max(0, avg_price - costs[i])
else:
human_coi_by_product[i] = initial_prices[i] - costs[i]
human_coi = float(np.mean(human_coi_by_product))
# Total leak: weighted by contamination
# Agents erode COI, humans pay full price
realized_coi = (1 - alpha) * human_coi + alpha * agent_coi
leak = policy_coi - realized_coi
# Order statistic effect: more agents = more erosion
n_agents = len(agent_sessions)
price_std = float(np.std(initial_prices))
order_erosion = order_statistic_erosion(n_agents, price_std, policy_coi)
return {
'policy_coi': policy_coi,
'agent_coi': agent_coi,
'human_coi': human_coi,
'realized_coi': realized_coi,
'leak': leak,
'order_stat_erosion': order_erosion,
'n_agent_sessions': n_agents,
'n_human_sessions': len(human_sessions),
'survival_ratio': realized_coi / (policy_coi + EPS) if policy_coi > EPS else 1.0,
}

View File

@@ -0,0 +1,104 @@
"""Behavioral separability for thesis human/agent classification.
Implements KL-divergence based separability scoring (Eq 20-21):
- Δ_H = D_KL(T̂' || T̄_H): divergence from human reference kernel
- Δ_A = D_KL(T̂' || T̄_A): divergence from agent reference kernel
- α̂(τ') = σ(β(Δ_H - Δ_A)): per-session contamination estimate
"""
from __future__ import annotations
from typing import Dict, List, TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .simplified import Session
# Reference transition kernels T̄_H, T̄_A estimated from real data (Eq 19)
TRANS_H = {
"start": {"view": 0.85, "end": 0.15},
"view": {"detail": 0.4, "add_to_cart": 0.3, "view": 0.2, "end": 0.1},
"detail": {"add_to_cart": 0.5, "view": 0.3, "end": 0.2},
"add_to_cart": {"purchase": 0.6, "view": 0.25, "end": 0.15},
"purchase": {"end": 1.0},
"checkout": {"purchase": 0.8, "end": 0.2},
"hover": {"view": 0.5, "detail": 0.3, "end": 0.2},
}
TRANS_A = {
"start": {"view": 0.95, "end": 0.05},
"view": {"detail": 0.6, "view": 0.25, "add_to_cart": 0.1, "end": 0.05},
"detail": {"view": 0.5, "add_to_cart": 0.15, "detail": 0.3, "end": 0.05},
"add_to_cart": {"view": 0.4, "purchase": 0.2, "end": 0.4},
"purchase": {"end": 1.0},
"checkout": {"purchase": 0.3, "end": 0.7},
"hover": {"view": 0.6, "detail": 0.35, "end": 0.05},
}
def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
"""Compute KL(p || q) with smoothing."""
if not p or not q:
return 0.0
all_keys = set(p.keys()) | set(q.keys())
total = 0.0
for k in all_keys:
pk = p.get(k, eps)
qk = q.get(k, eps)
if pk > eps:
total += pk * np.log(pk / max(qk, eps))
return max(0.0, total)
def build_kernel(events: List) -> Dict[str, Dict[str, float]]:
"""Build empirical transition kernel from event sequence."""
trans: Dict[str, Dict[str, int]] = {}
prev = "start"
for e in events:
curr = getattr(e, 'action', None) or e.get('action', 'end') if isinstance(e, dict) else 'end'
trans.setdefault(prev, {})
trans[prev][curr] = trans[prev].get(curr, 0) + 1
prev = curr
# add terminal transition
trans.setdefault(prev, {})
trans[prev]["end"] = trans[prev].get("end", 0) + 1
# normalize to probabilities
kernel = {}
for s, dests in trans.items():
total = sum(dests.values())
kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {"end": 1.0}
return kernel
def compute_divergence(kernel: Dict[str, Dict[str, float]], ref_h: Dict = None, ref_a: Dict = None) -> tuple[float, float]:
"""Compute Δ_H, Δ_A divergence from reference kernels (Eq 20-21)."""
ref_h = ref_h or TRANS_H
ref_a = ref_a or TRANS_A
delta_h = sum(kl_div(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
delta_a = sum(kl_div(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
return delta_h, delta_a
def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
"""Estimate per-session contamination α̂(τ') = σ(β(Δ_H - Δ_A)).
High Δ_H (far from human) and low Δ_A (close to agent) -> high α̂ (likely agent).
"""
if not session.events:
return 0.5
kernel = build_kernel(session.events)
delta_h, delta_a = compute_divergence(kernel)
if delta_h + delta_a < 1e-6:
return 0.5
# sigmoid: high when trajectory is more divergent from human than agent
return 1.0 / (1.0 + np.exp(-beta * (delta_h - delta_a)))
def batch_estimate_alpha(sessions: List["Session"]) -> tuple[float, List[float]]:
"""Estimate aggregate and per-session contamination."""
if not sessions:
return 0.0, []
alphas = [estimate_alpha(s) for s in sessions]
return float(np.mean(alphas)), alphas

View File

@@ -8,6 +8,14 @@ Objects:
- Demand proxy q_hat via weighted action aggregation
- COI leakage penalty for agent reconnaissance
- Limbo: alternating price/demand history for trajectory analysis
COI Correction (Jan 2026):
The fundamental COI formulation is:
COI = E[p_start] - p_transaction
This measures price erosion over time, not instantaneous margin × alpha.
Agents use multiple sessions to gather information and find minimum prices.
The price path from episode start to transaction captures information leakage.
"""
from __future__ import annotations
from dataclasses import dataclass, field

View File

@@ -6,6 +6,14 @@ Supports multiple reward modes and contamination scenarios.
Action: price multipliers [0.5, 1.5] applied to reference prices
Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
Reward: configurable objective (revenue, profit, robust, coi-aware)
COI Correction (Jan 2026):
The fundamental COI formulation is now:
COI = E[p_start] - p_transaction
This measures price erosion over time, not instantaneous margin × alpha.
Agents using different sessions gather information and drive prices down.
The COITracker now tracks prices over windows to capture this effect.
"""
from __future__ import annotations
from dataclasses import dataclass
@@ -20,7 +28,7 @@ except ImportError:
HAS_GYM = False
from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
from .coi import COIWindow, compute_coi_window, coi_erosion
from .coi import COIWindow, compute_coi_window, coi_erosion, COITracker, compute_multi_session_coi
@dataclass
@@ -73,6 +81,12 @@ class PricingEnv(gym.Env if HAS_GYM else object):
self._episode_rewards: list[float] = []
self._demand_agg = np.zeros(self.n)
# COI tracking: store initial prices for E[p] calculation
self._initial_prices: np.ndarray | None = None
self._coi_tracker = COITracker(window_size=10)
self._last_coi_metrics: Dict[str, float] = {}
self._last_window_coi: float = 0.0
self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
obs_dim = self.n + self.n + 1 + 1 + self.n + 1 # prices + demand + alpha_hat + alpha + margins + t
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
@@ -109,8 +123,29 @@ class PricingEnv(gym.Env if HAS_GYM else object):
if self._last_prices is not None:
vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))
# Track prices for windowed COI calculation
self._coi_tracker.add_step(prices)
# CORRECTED COI CALCULATION:
# COI = E[p_start] - p_transaction (price erosion over time)
# Use initial prices as E[p] and compute multi-session COI
coi_metrics = compute_multi_session_coi(
sessions=sys._last_sessions,
costs=sys.costs,
alpha=self._alpha,
initial_prices=self._initial_prices,
)
leak = float(coi_metrics['leak'])
# Also compute window-based COI for trend analysis
window_coi = self._coi_tracker.compute_window_coi(sys.costs)
# Store both for info dict
self._last_coi_metrics = coi_metrics
self._last_window_coi = window_coi
# For backward compatibility, also compute the old-style COI
coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
leak = float(coi.leak)
reward_fns = {
"revenue": lambda: revenue,
@@ -127,6 +162,11 @@ class PricingEnv(gym.Env if HAS_GYM else object):
self._t, self._alpha = 0, self.cfg.alpha_true
self._last_prices, self._last_demand = None, None
self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
# COI tracking: store initial prices as E[p] for COI = E[p] - p calculation
self._initial_prices = self._sys.refs.copy()
self._coi_tracker.reset()
return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
"costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}
@@ -150,6 +190,9 @@ class PricingEnv(gym.Env if HAS_GYM else object):
n_agents = int(self._alpha * self.cfg.sessions_per_step)
coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)
# Corrected COI metrics (price erosion over time)
coi_m = self._last_coi_metrics
info = {
"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
"alpha_error": abs(self._alpha - self._sys.alpha),
@@ -157,9 +200,19 @@ class PricingEnv(gym.Env if HAS_GYM else object):
"n_purchases": int(np.sum(purchases)),
"avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
"n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
# Legacy COI metrics (for backward compatibility)
"coi_erosion": coi_erosion(coi.policy, coi.agent),
"coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
"coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
# CORRECTED COI metrics: E[p] - p (price erosion)
"coi_policy_corrected": float(coi_m.get('policy_coi', 0)),
"coi_agent_corrected": float(coi_m.get('agent_coi', 0)),
"coi_human_corrected": float(coi_m.get('human_coi', 0)),
"coi_realized": float(coi_m.get('realized_coi', 0)),
"coi_leak_corrected": float(coi_m.get('leak', 0)),
"coi_order_stat_erosion": float(coi_m.get('order_stat_erosion', 0)),
"coi_survival_corrected": float(coi_m.get('survival_ratio', 1.0)),
"coi_window": float(self._last_window_coi),
"cumulative_reward": sum(self._episode_rewards), "step": self._t,
}
return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info