fix: correct COI formulation to measure price erosion over time

The fundamental error was treating COI as instantaneous margin × alpha. The corrected formulation is: COI = E[p_start] - p_transaction This measures price erosion over time, capturing how agents using multiple sessions gather information and drive prices down. Key changes: - Add coi.py with COIWindow, COITracker, and compute_multi_session_coi - Add separability.py with KL-divergence behavioral classification - Update simplified_env.py to track initial prices and compute windowed COI - Add corrected COI metrics (coi_*_corrected) alongside legacy metrics The new approach: 1. Tracks prices at episode start as E[p] (expected price) 2. Computes transaction prices as p (actual sale price) 3. Measures leak as the difference (price erosion) 4. Includes order statistic erosion (Theorem 1: N agents -> min price)
2026-06-01 00:53:36 +00:00 · 2026-01-26 15:23:32 +00:00
parent 98a9a3738c
commit 3e0f3d007c
4 changed files with 545 additions and 2 deletions
--- a/lab/case/thesis/simplified_env.py
+++ b/lab/case/thesis/simplified_env.py
@@ -6,6 +6,14 @@ Supports multiple reward modes and contamination scenarios.
 Action: price multipliers [0.5, 1.5] applied to reference prices
 Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
 Reward: configurable objective (revenue, profit, robust, coi-aware)
+
+COI Correction (Jan 2026):
+The fundamental COI formulation is now:
+    COI = E[p_start] - p_transaction
+
+This measures price erosion over time, not instantaneous margin × alpha.
+Agents using different sessions gather information and drive prices down.
+The COITracker now tracks prices over windows to capture this effect.
 """
 from __future__ import annotations
 from dataclasses import dataclass
@@ -20,7 +28,7 @@ except ImportError:
    HAS_GYM = False

 from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
-from .coi import COIWindow, compute_coi_window, coi_erosion
+from .coi import COIWindow, compute_coi_window, coi_erosion, COITracker, compute_multi_session_coi


@dataclass
@@ -73,6 +81,12 @@ class PricingEnv(gym.Env if HAS_GYM else object):
        self._episode_rewards: list[float] = []
        self._demand_agg = np.zeros(self.n)

+        # COI tracking: store initial prices for E[p] calculation
+        self._initial_prices: np.ndarray | None = None
+        self._coi_tracker = COITracker(window_size=10)
+        self._last_coi_metrics: Dict[str, float] = {}
+        self._last_window_coi: float = 0.0
+
        self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
        obs_dim = self.n + self.n + 1 + 1 + self.n + 1  # prices + demand + alpha_hat + alpha + margins + t
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
@@ -109,8 +123,29 @@ class PricingEnv(gym.Env if HAS_GYM else object):
        if self._last_prices is not None:
            vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))

+        # Track prices for windowed COI calculation
+        self._coi_tracker.add_step(prices)
+
+        # CORRECTED COI CALCULATION:
+        # COI = E[p_start] - p_transaction (price erosion over time)
+        # Use initial prices as E[p] and compute multi-session COI
+        coi_metrics = compute_multi_session_coi(
+            sessions=sys._last_sessions,
+            costs=sys.costs,
+            alpha=self._alpha,
+            initial_prices=self._initial_prices,
+        )
+        leak = float(coi_metrics['leak'])
+
+        # Also compute window-based COI for trend analysis
+        window_coi = self._coi_tracker.compute_window_coi(sys.costs)
+
+        # Store both for info dict
+        self._last_coi_metrics = coi_metrics
+        self._last_window_coi = window_coi
+
+        # For backward compatibility, also compute the old-style COI
        coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
-        leak = float(coi.leak)

        reward_fns = {
            "revenue": lambda: revenue,
@@ -127,6 +162,11 @@ class PricingEnv(gym.Env if HAS_GYM else object):
        self._t, self._alpha = 0, self.cfg.alpha_true
        self._last_prices, self._last_demand = None, None
        self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
+
+        # COI tracking: store initial prices as E[p] for COI = E[p] - p calculation
+        self._initial_prices = self._sys.refs.copy()
+        self._coi_tracker.reset()
+
        return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
                                   "costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}

@@ -150,6 +190,9 @@ class PricingEnv(gym.Env if HAS_GYM else object):
        n_agents = int(self._alpha * self.cfg.sessions_per_step)
        coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)

+        # Corrected COI metrics (price erosion over time)
+        coi_m = self._last_coi_metrics
+
        info = {
            "alpha_true": self._alpha, "alpha_est": self._sys.alpha,
            "alpha_error": abs(self._alpha - self._sys.alpha),
@@ -157,9 +200,19 @@ class PricingEnv(gym.Env if HAS_GYM else object):
            "n_purchases": int(np.sum(purchases)),
            "avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
            "n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
+            # Legacy COI metrics (for backward compatibility)
            "coi_erosion": coi_erosion(coi.policy, coi.agent),
            "coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
            "coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
+            # CORRECTED COI metrics: E[p] - p (price erosion)
+            "coi_policy_corrected": float(coi_m.get('policy_coi', 0)),
+            "coi_agent_corrected": float(coi_m.get('agent_coi', 0)),
+            "coi_human_corrected": float(coi_m.get('human_coi', 0)),
+            "coi_realized": float(coi_m.get('realized_coi', 0)),
+            "coi_leak_corrected": float(coi_m.get('leak', 0)),
+            "coi_order_stat_erosion": float(coi_m.get('order_stat_erosion', 0)),
+            "coi_survival_corrected": float(coi_m.get('survival_ratio', 1.0)),
+            "coi_window": float(self._last_window_coi),
            "cumulative_reward": sum(self._episode_rewards), "step": self._t,
        }
        return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info