shock: defining new lab environment and formulation

2026-07-16 01:53:37 +00:00 · 2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions
--- a/lab/case/thesis/init.py
+++ b/lab/case/thesis/init.py
@@ -0,0 +1,25 @@
+"""
+Thesis-specific implementation of the PHANTOM pricing defense framework.
+
+This module implements the mathematical models from the thesis:
+- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3)
+- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1)
+- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23)
+- COIMetrics: Cost of Information tracking (Definition 1)
+
+The platform configuration creates a research environment that directly
+maps to the thesis mathematical framework for DR-RL experiments.
+"""
+from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
+from .execution import HybridExecutionModel, HybridExecutionConfig
+from .objectives import RobustStackelbergObjective, COIObjective
+from .platform import make_thesis_platform, ThesisConfig
+from .metrics import COIMetrics, compute_coi, compute_separability
+
+__all__ = [
+    'ContaminatedArrivalModel', 'ContaminatedArrivalConfig',
+    'HybridExecutionModel', 'HybridExecutionConfig',
+    'RobustStackelbergObjective', 'COIObjective',
+    'make_thesis_platform', 'ThesisConfig',
+    'COIMetrics', 'compute_coi', 'compute_separability',
+]
--- a/lab/case/thesis/arrivals.py
+++ b/lab/case/thesis/arrivals.py
@@ -0,0 +1,327 @@
+"""Contaminated arrivals using learned MDP kernels from behavior_loader.
+
+Implements thesis demand model (Section 3.1):
+- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t  (Eq 3)
+- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i]     (Eq 2)
+- Per-session separability via KL divergence Δ_H, Δ_A              (Eq 20-21)
+
+The arrival model samples sessions from a mixture of human/agent behavioral profiles,
+each session produces a trajectory τ_s and associated demand computation q(τ').
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from types import SimpleNamespace
+from typing import Dict, List, Tuple, Optional
+import numpy as np
+from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
+from ...outlet.constants import Side, OpportunityType
+from ...outlet.math_util import poisson_arrivals
+
+try:
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+    from sim.rl.behavior_loader.models import (
+        BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence
+    )
+    REAL_MDP = True
+except ImportError:
+    REAL_MDP = False
+    kl_divergence = None
+
+EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details",
+              "add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"}
+EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page",
+               "view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item",
+               "add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"}
+
+# action space partition A = A_nav ∪ A_cart ∪ A_filter ∪ A_dwell with signal weights ω (Table 1)
+ACTION_WEIGHTS: Dict[str, float] = {
+    "add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0,  # A_cart
+    "hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25,               # A_dwell
+    "page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav
+    "search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0,   # A_filter
+}
+
+
+@dataclass
+class SessionDemand:
+    """Per-session demand computation per thesis formulation (Section 3.1).
+
+    Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses
+    divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ').
+    """
+    session_id: str
+    q: Dict[int, float]               # q̂_i demand proxy per product (Eq 2)
+    trajectory: List[Dict]            # τ_s = (e_{s,1}, ..., e_{s,L_s})
+    delta_h: float = 0.0              # D_KL(T̂' || T̄_H) (Eq 20)
+    delta_a: float = 0.0              # D_KL(T̂' || T̄_A) (Eq 21)
+    alpha_hat: float = 0.0            # per-session contamination estimate
+    actor_class: str = "H"            # ground truth Y_s ∈ {H, A}
+    theta: Dict[str, float] = field(default_factory=dict)
+
+
+def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]:
+    """Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2."""
+    q = {i: 0.0 for i in range(n_products)}
+    for e in events:
+        action, pidx = e.get("eventName", ""), e.get("product_idx")
+        if pidx is not None and 0 <= pidx < n_products:
+            q[pidx] += ACTION_WEIGHTS.get(action, 0.1)
+    return q
+
+
+def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]:
+    """Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21)."""
+    if not events or kl_divergence is None:
+        return 0.0, 0.0
+    # build empirical transition kernel from trajectory
+    trans: Dict[str, Dict[str, int]] = {}
+    prev = "session_start"
+    for e in events:
+        curr = e.get("eventName", "session_end")
+        trans.setdefault(prev, {})
+        trans[prev][curr] = trans[prev].get(curr, 0) + 1
+        prev = curr
+    # normalize to probabilities
+    kernel = {}
+    for s, dests in trans.items():
+        total = sum(dests.values())
+        kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {}
+    # aggregate to event-level and compute KL divergence against reference kernels
+    delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
+    delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
+    return delta_h, delta_a
+
+def _canonicalize(raw: Dict) -> Dict:
+    out = {}
+    for src, dsts in raw.items():
+        sc = EVENT_CANON.get(src, src)
+        out.setdefault(sc, {})
+        for dst, p in dsts.items():
+            dc = EVENT_CANON.get(dst, dst)
+            out[sc][dc] = out[sc].get(dc, 0.0) + p
+    return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0}
+
+
+class BehavioralProfile:
+    """Markov profile from learned MDP kernels (Section 3.5.2).
+
+    Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19)
+    """
+    STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
+    # fallback kernels T̄_H, T̄_A when real data unavailable
+    FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15},
+                  "view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
+                  "learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
+                  "add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
+                  "purchase_complete": {"session_end": 1.0}}
+    FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05},
+                  "view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05},
+                  "learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05},
+                  "add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4},
+                  "purchase_complete": {"session_end": 1.0}}
+
+    def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""):
+        self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95)
+        self.trans = self._load(data_dir)  # T̂_Y transition kernel
+        self._ensure_terminal()
+        self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES}
+
+    def _load(self, data_dir: str) -> Dict:
+        if not REAL_MDP or not data_dir:
+            print("using fallback")
+            return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+        try:
+            mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP()
+            raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
+            return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+        except Exception:
+            print("using fallback")
+            return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+
+    def _ensure_terminal(self):
+        self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0)
+        self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1})
+
+    def _tprobs(self, state: str, pidx: int) -> Dict[str, float]:
+        probs = dict(self.trans.get(state, {"session_end": 1.0}))
+        if state == "add_item_to_cart":
+            base = probs.get("purchase_complete", 0.0)
+            df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0)
+            adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95)
+            rem = max(1e-6, 1.0 - adj)
+            other = sum(v for k, v in probs.items() if k != "purchase_complete")
+            probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()}
+        total = sum(probs.values())
+        return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0}
+
+    def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]:
+        events, fevts = [], []
+        state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices)))
+        cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05)
+
+        while state != "session_end" and len(events) < 40:
+            if state != "session_start":
+                row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human",
+                       "eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}",
+                       "price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"),
+                       "ts": t, "unit_cost": cost, "base_price": float(prices[pidx])}
+                if state == "purchase_complete":
+                    row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost)
+                events.append(row)
+                fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t))
+
+            probs = self._tprobs(state, pidx)
+            state = rng.choice(list(probs.keys()), p=list(probs.values()))
+            sh, sc = self.dwell.get(state, (2.0, 1.0))
+            t += max(0.3, rng.gamma(shape=sh, scale=sc))
+        return events, fevts
+
+
+@dataclass
+class ContaminatedArrivalConfig:
+    base_rate: float = 20.0
+    alpha_contamination: float = 0.2
+    alpha_drift: float = 0.0
+    alpha_bounds: tuple[float, float] = (0.0, 0.5)
+    human_views_range: tuple[int, int] = (1, 4)
+    agent_views_range: tuple[int, int] = (3, 10)
+    agent_systematic: bool = True
+    use_real_behavior: bool = True
+    human_data_dir: str = ""
+    agent_data_dir: str = ""
+
+
+class ContaminatedArrivalModel:
+    """Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3).
+
+    Samples sessions from human/agent behavioral profiles, computes per-session
+    demand proxy q̂ and divergence signals Δ_H, Δ_A for separability.
+    """
+
+    def __init__(self, cfg: ContaminatedArrivalConfig | None = None):
+        self.cfg = cfg or ContaminatedArrivalConfig()
+        self._alpha = self.cfg.alpha_contamination
+        self._scount = 0
+        self._profiles: Dict[str, BehavioralProfile] = {}
+        self._ref_kernels: Dict[str, Dict] = {}  # T̄_H, T̄_A reference kernels
+        self._session_demands: List[SessionDemand] = []  # collected session demands
+
+    @property
+    def alpha(self) -> float:
+        return self._alpha
+
+    def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile:
+        key = actor
+        if key not in self._profiles:
+            ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir
+            if not ddir and self.cfg.use_real_behavior:
+                base = Path(__file__).parent.parent.parent.parent / "experiments"
+                ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data"))
+            profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "")
+            self._profiles[key] = profile
+            self._ref_kernels[key] = profile.trans  # cache T̄_Y for divergence
+        return self._profiles[key]
+
+    def get_ref_kernels(self) -> Tuple[Dict, Dict]:
+        """Return reference transition kernels T̄_H, T̄_A for divergence computation."""
+        return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H),
+                self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A))
+
+    def get_session_demands(self) -> List[SessionDemand]:
+        """Return collected session demands for downstream analysis."""
+        return self._session_demands
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
+        """Sample arrivals as per Eq 3: mixture of human/agent demand distributions.
+
+        For each session s, computes:
+        - Trajectory τ_s from behavioral profile sampling
+        - Demand proxy q̂ via weighted action aggregation (Eq 2)
+        - Divergence signals Δ_H, Δ_A for separability (Eq 20-21)
+        - Per-session contamination estimate α̂(τ')
+        """
+        cfg = self.cfg
+        if cfg.alpha_drift != 0:
+            self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds)
+        hidden.contamination = self._alpha
+
+        n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng)
+        prices, costs = instruments.refs, instruments.costs
+        margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0)
+        hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin)
+        ref_h, ref_a = self.get_ref_kernels()
+
+        opps = []
+        for _ in range(n_sess):
+            self._scount += 1
+            sid = f"s{self._scount:06d}"
+            is_agent = rng.random() < self._alpha
+            actor, probs = ("agents", aprob) if is_agent else ("humans", hprob)
+            profile = self._profile(actor, probs)
+            events, fevts = profile.sample(rng, sid, prices, costs)
+
+            # compute demand proxy q̂ per Eq 2
+            q = compute_demand_proxy(events, instruments.n)
+
+            # compute divergence signals Δ_H, Δ_A per Eq 20-21
+            delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a)
+            # per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A))
+            alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5
+
+            theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent
+                     else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0})
+
+            # store session demand for downstream analysis
+            self._session_demands.append(SessionDemand(
+                session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a,
+                alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta))
+
+            viewed = list({e["product_idx"] for e in events if "product_idx" in e})
+            if not viewed:
+                vr = cfg.agent_views_range if is_agent else cfg.human_views_range
+                viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False))
+
+            for vi, iid in enumerate(viewed):
+                opps.append(Opportunity(
+                    id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
+                    instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt),
+                    context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent,
+                             'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed),
+                             'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events,
+                             'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a}))
+        return opps
+
+
+@dataclass
+class AdversarialArrivalConfig:
+    base_rate: float = 5.0
+    n_parallel_agents: int = 3
+    query_all_products: bool = True
+
+
+class AdversarialArrivalModel:
+    """Adversarial coordination (Theorem 1): as N->inf, COI->0."""
+
+    def __init__(self, cfg: AdversarialArrivalConfig | None = None):
+        self.cfg = cfg or AdversarialArrivalConfig()
+        self._qcount = 0
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
+        cfg, opps = self.cfg, []
+        for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)):
+            self._qcount += 1
+            for ai in range(cfg.n_parallel_agents):
+                sid = f"adv{self._qcount:06d}-{ai}"
+                prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1)
+                for iid in prods:
+                    opps.append(Opportunity(
+                        id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
+                        instrument_id=int(iid), size=1.0, t=t,
+                        context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True,
+                                 'agent_index': ai, 'query_group': self._qcount,
+                                 'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}}))
+        return opps
--- a/lab/case/thesis/execution.py
+++ b/lab/case/thesis/execution.py
@@ -0,0 +1,91 @@
+"""Execution models with divergent H/A behavior using ground truth labels."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict
+import numpy as np
+from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState
+from ...outlet.math_util import sigmoid, safe_log, EPS
+
+
+@dataclass
+class HybridExecutionConfig:
+    human_base_prob: float = 0.3
+    human_elasticity: float = 2.5
+    agent_conversion: float = 0.01
+    cross_elasticity: float = 0.4
+    quality_weight: float = 0.2
+    use_separability: bool = False
+
+
+class HybridExecutionModel:
+    """Execution with divergent H/A behavior using ground truth labels."""
+
+    def __init__(self, cfg: HybridExecutionConfig | None = None):
+        self.cfg = cfg or HybridExecutionConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        cfg, idx = self.cfg, int(opp.instrument_id)
+        price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx])
+        ctx = opp.context
+        theta = ctx.get('theta', {})
+        is_agent = ctx.get('is_agent', False)
+
+        if is_agent:
+            return cfg.agent_conversion * theta.get('base_conversion', 1.0)
+
+        # human logit discrete choice
+        sens = theta.get('price_sensitivity', cfg.human_elasticity)
+        base = theta.get('base_conversion', cfg.human_base_prob)
+        u_price = -sens * safe_log(price / (ref + EPS))
+        quality = instruments.instruments[idx].attrs.get('quality', 0.5)
+        u_quality = cfg.quality_weight * quality
+
+        u_comp = 0.0
+        if market and market.competitor_quotes is not None:
+            cp = market.competitor_quotes[idx]
+            if cp < price:
+                u_comp = -cfg.cross_elasticity * (price - cp) / ref
+
+        utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp
+        return float(sigmoid(utility))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
+        if context is None:
+            return fills / (self.cfg.human_base_prob + EPS)
+        agent_frac = context.get('contamination', 0.0)
+        return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS)
+
+
+@dataclass
+class SeparableExecutionConfig:
+    human_funnel: Dict[str, float] = None
+    agent_funnel: Dict[str, float] = None
+
+    def __post_init__(self):
+        self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6}
+        self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1}
+
+
+class SeparableExecutionModel:
+    """Execution with Markov funnel kernels using ground truth labels."""
+
+    def __init__(self, cfg: SeparableExecutionConfig | None = None):
+        self.cfg = cfg or SeparableExecutionConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        is_agent = opp.context.get('is_agent', False)
+        probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel
+        p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase']
+
+        if not is_agent:
+            idx = int(opp.instrument_id)
+            price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS)
+            p *= np.exp(-0.5 * (price_ratio - 1.0))
+        return float(np.clip(p, 0, 1))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
+        h = self.cfg.human_funnel
+        exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase']
+        return fills / (exp_conv + EPS)
--- a/lab/case/thesis/metrics.py
+++ b/lab/case/thesis/metrics.py
@@ -0,0 +1,102 @@
+"""Thesis metrics for COI and behavioral analysis using ground truth labels."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict
+import numpy as np
+from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet
+from ...outlet.math_util import safe_log, EPS
+
+
+@dataclass
+class COIMetrics:
+    coi_level: float = 0.0
+    coi_leakage: float = 0.0
+    realized_premium: float = 0.0
+    theoretical_max: float = 0.0
+    erosion_rate: float = 0.0
+
+    def to_dict(self) -> dict[str, float]:
+        return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']}
+
+
+def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics:
+    prices, costs, refs = quote.prices, instruments.costs, instruments.refs
+    margins = prices - costs
+    coi_level = float(np.mean(margins))
+    theoretical_max = float(np.mean(costs))
+    realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0
+    price_var = float(np.var(prices / refs))
+    coi_leakage = contamination * (coi_level + price_var)
+    erosion_rate = contamination * coi_level / (theoretical_max + EPS)
+    return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium,
+                      theoretical_max=theoretical_max, erosion_rate=erosion_rate)
+
+
+@dataclass
+class SeparabilityMetrics:
+    classification_accuracy: float = 0.0
+    estimated_alpha: float = 0.0
+    n_human_sessions: int = 0
+    n_agent_sessions: int = 0
+
+
+def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics:
+    """Compute separability using ground truth labels only."""
+    if logs.events is None or len(logs.events) == 0:
+        return SeparabilityMetrics(estimated_alpha=true_alpha)
+
+    sessions: Dict[str, bool] = {}
+    for evt in logs.events:
+        sid = evt.metadata.get('session_id', evt.opportunity_id)
+        if sid not in sessions:
+            sessions[sid] = evt.metadata.get('is_agent', False)
+
+    n_agent = sum(1 for is_agent in sessions.values() if is_agent)
+    n_human = len(sessions) - n_agent
+    est_alpha = n_agent / len(sessions) if sessions else 0.0
+
+    return SeparabilityMetrics(
+        classification_accuracy=1.0,  # ground truth is always correct
+        estimated_alpha=est_alpha,
+        n_human_sessions=n_human,
+        n_agent_sessions=n_agent)
+
+
+@dataclass
+class RevenueAttribution:
+    total_revenue: float = 0.0
+    human_revenue: float = 0.0
+    agent_revenue: float = 0.0
+    human_conversion: float = 0.0
+    agent_conversion: float = 0.0
+
+
+def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution:
+    if logs.executions is None:
+        return RevenueAttribution(total_revenue=metrics.revenue)
+
+    human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0
+    for exe in logs.executions:
+        if exe.propensity < 0.05:
+            agent_rev += exe.price * exe.size_filled
+            agent_cnt += 1
+        else:
+            human_rev += exe.price * exe.size_filled
+            human_cnt += 1
+
+    total_exp = logs.aggregates.get('n_arrivals', 1)
+    return RevenueAttribution(
+        total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev,
+        human_conversion=human_cnt / (total_exp * 0.8 + EPS),
+        agent_conversion=agent_cnt / (total_exp * 0.2 + EPS))
+
+
+def order_statistic_erosion(n_agents: int, price_variance: float) -> float:
+    """COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min."""
+    if n_agents <= 1:
+        return 0.0
+    sigma, log_n = np.sqrt(price_variance), safe_log(n_agents)
+    if log_n < 1:
+        return 0.0
+    shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
+    return float(min(shift / (sigma * 2 + EPS), 1.0))
--- a/lab/case/thesis/objectives.py
+++ b/lab/case/thesis/objectives.py
@@ -0,0 +1,228 @@
+"""
+Thesis-specific objectives implementing robust pricing under contamination.
+
+Implements the Maximin objective from Eq 23:
+π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
+
+Key components:
+- COIObjective: Cost of Information penalty (Definition 1)
+- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
+- UXPenalty: User experience degradation from volatility
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ...outlet.objectives.base import BaseObjective, CompositeObjective
+from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+from ...outlet.math_util import safe_log, EPS
+
+class COIObjective(BaseObjective):
+    """Cost of Information penalty from Definition 1.
+
+    COI(π) = E[P] - p_min
+
+    The expected price premium over marginal cost represents the platform's
+    pricing power. Agent reconnaissance erodes this by revealing price
+    distribution to buyers.
+
+    We implement COI_leakage = f(τ') · InfoValue(p, τ')
+    where f(τ') is the estimated agent probability.
+    """
+
+    def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
+        """
+        Args:
+            lambda_coi: Weight on COI penalty
+            use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
+        """
+        self.lambda_coi = lambda_coi
+        self.use_revelation = use_revelation
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # COI_leakage = α · InfoValue
+        alpha = hidden.contamination
+
+        if self.use_revelation:
+            # revelation surrogate: rare prices reveal more about policy
+            # InfoValue = -log(π(p|τ')) ≈ surprise of the price
+            price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
+            info_value = price_surprise
+        else:
+            # query-tax surrogate: each agent query incurs constant leakage
+            info_value = 1.0
+
+        leakage = alpha * info_value
+        return -self.lambda_coi * leakage
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
+        return {
+            'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'contamination': alpha,
+            'avg_margin': float(np.mean(margins)),
+        }
+
+@dataclass
+class RobustObjectiveConfig:
+    """Configuration for robust Stackelberg objective.
+
+    Attributes:
+        lambda_coi: Weight on COI penalty (λ in Eq 23)
+        lambda_ux: Weight on UX penalty
+        lambda_volatility: Weight on price volatility penalty
+        gamma_inventory: Inventory risk aversion
+        wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
+    """
+    lambda_coi: float = 0.5
+    lambda_ux: float = 0.1
+    lambda_volatility: float = 0.2
+    gamma_inventory: float = 0.1
+    wasserstein_epsilon: float = 0.1
+
+class RobustStackelbergObjective(BaseObjective):
+    """Implements the Maximin Objective from thesis Eq 23.
+
+    π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
+
+    The objective balances:
+    1. Revenue R(p,d) from human purchases
+    2. COI penalty for information leakage to agents
+    3. UX penalty for price volatility
+    4. Inventory/holding costs
+
+    The min over ambiguity set U_ε is approximated by penalizing
+    high contamination scenarios more heavily.
+    """
+
+    def __init__(self, cfg: RobustObjectiveConfig | None = None):
+        self.cfg = cfg or RobustObjectiveConfig()
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        cfg = self.cfg
+
+        # 1. base revenue (R(p,d))
+        revenue = metrics.revenue
+        cost = metrics.cost
+        profit = revenue - cost
+
+        # 2. COI penalty: scales with contamination and margin extraction
+        # high margins + high contamination = high leakage
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+        coi_penalty = cfg.lambda_coi * avg_margin * alpha
+
+        # 3. UX penalty: price volatility harms legitimate users
+        volatility_penalty = cfg.lambda_volatility * metrics.volatility
+
+        # 4. inventory/position cost
+        position_penalty = cfg.gamma_inventory * metrics.position_cost
+
+        # 5. lost opportunity cost (stockouts)
+        lost_penalty = 0.1 * metrics.lost_opportunity
+
+        # robust adjustment: under adversarial distribution Q,
+        # expect lower revenue and higher costs
+        # approximate via worst-case contamination within ε-ball
+        worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
+        robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
+
+        total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
+
+        return total
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        cfg = self.cfg
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+
+        return {
+            'revenue': metrics.revenue,
+            'cost': metrics.cost,
+            'profit': metrics.revenue - metrics.cost,
+            'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
+            'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
+            'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
+            'lost_penalty': -0.1 * metrics.lost_opportunity,
+            'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
+            'contamination': alpha,
+            'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
+        }
+
+class UXPenalty(BaseObjective):
+    """User experience penalty from price volatility.
+
+    High price volatility degrades UX for legitimate human users.
+    This term ensures the defense doesn't harm real customers while
+    protecting against agent reconnaissance.
+    """
+
+    def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
+        self.scale = scale
+        self.max_vol = max_acceptable_volatility
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # penalty increases quadratically beyond threshold
+        excess_vol = max(0, metrics.volatility - self.max_vol)
+        return -self.scale * (excess_vol ** 2)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {
+            'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'volatility': metrics.volatility,
+        }
+
+class AdaptiveObjective(BaseObjective):
+    """Objective that adapts weights based on estimated contamination.
+
+    When contamination is low, focus on revenue maximization.
+    When contamination is high, increase COI defense weight.
+    """
+
+    def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
+                 adaptation_rate: float = 2.0):
+        self.base_lambda = base_lambda_coi
+        self.max_lambda = max_lambda_coi
+        self.rate = adaptation_rate
+
+    def _adaptive_lambda(self, alpha: float) -> float:
+        # sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
+        from ...outlet.math_util import sigmoid
+        scale = sigmoid(self.rate * (alpha - 0.3))
+        return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        alpha = hidden.contamination
+        lambda_coi = self._adaptive_lambda(alpha)
+
+        profit = metrics.revenue - metrics.cost
+        margins = quote.prices - instruments.costs
+        coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
+
+        return profit - coi_penalty
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        return {
+            'profit': metrics.revenue - metrics.cost,
+            'adaptive_lambda': self._adaptive_lambda(alpha),
+            'contamination': alpha,
+        }
+
+def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
+                          lambda_vol: float = 0.2) -> CompositeObjective:
+    """Create the standard thesis objective composition."""
+    return CompositeObjective([
+        (RobustStackelbergObjective(RobustObjectiveConfig(
+            lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
+    ])
--- a/lab/case/thesis/platform.py
+++ b/lab/case/thesis/platform.py
@@ -0,0 +1,176 @@
+"""Thesis platform with real MDP behavioral models and separability scoring."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
+                       PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
+from ...outlet.mechanisms.posted_price import PostedPriceConfig
+from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
+from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
+from .execution import HybridExecutionModel, HybridExecutionConfig
+from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
+
+
+@dataclass
+class ThesisConfig:
+    # instruments
+    n_instruments: int = 10
+    cost_range: tuple[float, float] = (5.0, 50.0)
+    margin_range: tuple[float, float] = (0.2, 0.5)
+
+    # contamination (Section 3.1)
+    alpha_contamination: float = 0.2
+    alpha_drift: float = 0.0
+    alpha_bounds: tuple[float, float] = (0.0, 0.5)
+
+    # objectives (Eq 23)
+    lambda_coi: float = 0.5
+    lambda_ux: float = 0.1
+    lambda_volatility: float = 0.2
+    wasserstein_epsilon: float = 0.1
+
+    # arrivals
+    sessions_per_step: int = 30
+    human_views_range: tuple[int, int] = (1, 4)
+    agent_views_range: tuple[int, int] = (3, 10)
+
+    # inventory
+    initial_inventory: float = 100.0
+    holding_cost_rate: float = 0.002
+
+    # real behavioral models (from sim.rl)
+    use_real_behavior: bool = True
+    use_separability: bool = False  # disabled until classifier trained
+    human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
+    agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
+
+    # simulation
+    max_steps: int = 500
+    seed: int | None = 24
+    log_level: LogLevel = LogLevel.AGG_ONLY
+
+
+def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
+    """Resolve data directories for behavioral models."""
+    base = Path(__file__).parent.parent.parent.parent / "experiments"
+    human = cfg.human_data_dir or str(base / "collected_data")
+    agent = cfg.agent_data_dir or str(base / "agents/collected_data")
+    return human, agent
+
+
+def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
+    """Create platform with real MDP behavioral models.
+
+    Implements:
+    - Contaminated arrivals using learned MDP kernels from behavior_loader
+    - Hybrid execution with real separability scoring from lib.separability
+    - Robust Stackelberg objective (Eq 23)
+    """
+    cfg = cfg or ThesisConfig()
+    rng = np.random.default_rng(cfg.seed)
+    human_dir, agent_dir = _resolve_data_dirs(cfg)
+
+    instruments = make_instruments(
+        n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
+        inst_type=InstrumentType.SKU, rng=rng)
+    instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
+
+    arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
+        base_rate=cfg.sessions_per_step,
+        alpha_contamination=cfg.alpha_contamination,
+        alpha_drift=cfg.alpha_drift,
+        alpha_bounds=cfg.alpha_bounds,
+        human_views_range=cfg.human_views_range,
+        agent_views_range=cfg.agent_views_range,
+        use_real_behavior=cfg.use_real_behavior,
+        human_data_dir=human_dir,
+        agent_data_dir=agent_dir,
+    ))
+
+    execution = HybridExecutionModel(HybridExecutionConfig(
+        use_separability=cfg.use_separability,
+    ))
+
+    mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
+    position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
+
+    market = None
+    objective = RobustStackelbergObjective(RobustObjectiveConfig(
+        lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
+        lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
+
+    obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
+    platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
+                                   seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
+
+    return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
+                    position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
+
+
+@dataclass
+class AblationConfig(ThesisConfig):
+    disable_coi_penalty: bool = False
+    disable_ux_penalty: bool = False
+    disable_contamination: bool = False
+    disable_real_behavior: bool = False
+
+
+def make_ablation_platform(cfg: AblationConfig) -> Platform:
+    if cfg.disable_coi_penalty:
+        cfg.lambda_coi = 0.0
+    if cfg.disable_ux_penalty:
+        cfg.lambda_ux = 0.0
+    if cfg.disable_contamination:
+        cfg.alpha_contamination = 0.0
+    if cfg.disable_real_behavior:
+        cfg.use_real_behavior = False
+        cfg.use_separability = False
+    return make_thesis_platform(cfg)
+
+
+def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
+                        n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
+    """Test performance across contamination levels (Theorem 1 validation)."""
+    from ...experiments.eval import rollout, fixed_price_policy
+
+    results = {}
+    base_cfg = base_cfg or ThesisConfig()
+
+    for alpha in alpha_values:
+        cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
+                          alpha_contamination=alpha)
+        platform = make_thesis_platform(cfg)
+        policy = fixed_price_policy(platform.instruments.refs)
+        result = rollout(platform, policy, n_steps, seed=seed)
+        results[alpha] = {
+            'total_reward': result.total_reward,
+            'total_pnl': result.total_pnl,
+            'avg_conversion': result.avg_conversion,
+            'final_contamination': platform._hidden.contamination,
+        }
+    return results
+
+
+def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
+    """Compare real vs synthetic behavioral models."""
+    from ...experiments.eval import rollout, fixed_price_policy
+
+    base_cfg = base_cfg or ThesisConfig()
+    modes = {
+        'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
+        'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
+        'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
+    }
+
+    results = {}
+    for name, cfg in modes.items():
+        platform = make_thesis_platform(cfg)
+        policy = fixed_price_policy(platform.instruments.refs)
+        result = rollout(platform, policy, n_steps, seed=seed)
+        results[name] = {
+            'total_reward': result.total_reward,
+            'total_pnl': result.total_pnl,
+            'avg_conversion': result.avg_conversion,
+        }
+    return results
--- a/lab/case/thesis/run_experiment.py
+++ b/lab/case/thesis/run_experiment.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+"""Thesis simulation experiments with real MDP behavioral models."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+if __name__ == '__main__':
+    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+
+from lab.case.thesis.platform import make_thesis_platform, ThesisConfig
+from lab.case.thesis.metrics import compute_coi, compute_separability
+from lab.experiments.eval import compare_policies
+import numpy as np
+
+
+def demo_basic_simulation():
+    print("=" * 70)
+    print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)")
+    print("=" * 70)
+
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5,
+                       max_steps=100, seed=42, use_real_behavior=True)
+    platform = make_thesis_platform(cfg)
+
+    print(f"\nInstruments: {platform.instruments.n}")
+    print(f"Reference prices: {platform.instruments.refs.round(2)}")
+    print(f"Costs: {platform.instruments.costs.round(2)}")
+    print(f"Initial contamination alpha={cfg.alpha_contamination}")
+    print(f"Using real behavior: {cfg.use_real_behavior}")
+
+    result = platform.reset(seed=42)
+    total_reward, coi_history = 0, []
+
+    print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}")
+    print("-" * 55)
+
+    for t in range(cfg.max_steps):
+        action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n)
+        result = platform.step(action)
+        total_reward += result.reward
+        coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination)
+        coi_history.append(coi.coi_level)
+
+        if t % 20 == 0:
+            print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} "
+                  f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}")
+
+    print("-" * 55)
+    print(f"Total Reward: {total_reward:.2f}")
+    print(f"Average COI: {np.mean(coi_history):.2f}")
+    print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}")
+
+
+def demo_contamination_sweep():
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)")
+    print("=" * 70)
+
+    from lab.case.thesis.platform import sweep_contamination
+    trials = 20
+    alpha_values = [i/trials for i in range(trials)]
+    results = sweep_contamination(alpha_values, n_steps=100, seed=42)
+
+    print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}")
+    print("-" * 45)
+    for alpha, m in sorted(results.items()):
+        print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}")
+
+    rewards = [results[a]['total_reward'] for a in sorted(results.keys())]
+    dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)])
+    trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1]
+    print(f"Trend (alpha~reward correlation): {trend:.3f}")
+
+
+def demo_policy_comparison():
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: Policy Comparison under Contamination")
+    print("=" * 70)
+
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42)
+    platform = make_thesis_platform(cfg)
+
+    def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0
+    def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0
+    def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0
+    def adaptive_policy(obs, t):
+        fills = obs[platform.instruments.n:2*platform.instruments.n]
+        exp = obs[2*platform.instruments.n:3*platform.instruments.n]
+        conv = np.sum(fills) / (np.sum(exp) + 1e-8)
+        return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0
+
+    policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy,
+                'conservative': conservative_policy, 'adaptive': adaptive_policy}
+    results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42)
+
+    print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}")
+    print("-" * 65)
+    for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']):
+        print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} "
+              f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}")
+
+
+def demo_session_analysis():
+    """Analyze session-level behavior from MDP trajectories."""
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: Session Analysis (Ground Truth)")
+    print("=" * 70)
+
+    from lab.outlet.constants import LogLevel
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50,
+                       log_level=LogLevel.FULL, seed=42, use_real_behavior=True)
+    platform = make_thesis_platform(cfg)
+
+    result = platform.reset(seed=42)
+    human_sessions, agent_sessions = 0, 0
+
+    for t in range(cfg.max_steps):
+        action = platform.instruments.refs * 1.1
+        result = platform.step(action)
+        sep = compute_separability(result.logs, result.hidden.contamination)
+        human_sessions += sep.n_human_sessions
+        agent_sessions += sep.n_agent_sessions
+
+    total = human_sessions + agent_sessions
+    print(f"\nTotal sessions: {total}")
+    print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)")
+    print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)")
+    print(f"True contamination: {cfg.alpha_contamination:.1%}")
+    print(f"Observed contamination: {agent_sessions/total:.1%}")
+
+
+if __name__ == '__main__':
+    demo_basic_simulation()
+    demo_contamination_sweep()
+    # demo_policy_comparison()
+    # demo_session_analysis()