Merge pull request #50 from velocitatem/new-simulation-environment-development

New simulation environment development
2026-07-16 01:53:37 +00:00 · 2026-01-30 13:19:53 +01:00
parent a033e77697 52fe865598
commit 574e05d9e0
65 changed files with 6747 additions and 644 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
 *.old
 **/package-lock.json
 **/*.parquet
+**/_build/

 paper/src/bib/auto
 experiments/airflow/logs/*
@@ -21,3 +22,5 @@ sim/rl/behavior_loader/*.png
 sim/rl/behavior_loader/*.svg
 sim/rl/behavior_loader/*.pdf
 tests/e2e/node_modules/**
+lab/case/thesis/runs*/
+sim/case/thesis_simplified/runs*/
--- a/engine/engine.py
+++ b/engine/engine.py
@@ -0,0 +1,66 @@
+from sys import platform
+import numpy as np
+from .lib.demand import generate_demand, estimate_demand
+from .lib.behavior import sample_behavior
+from logging import INFO, getLogger
+logger = getLogger(__name__)
+logger.setLevel(INFO)
+
+
+
+class MarketEngine():
+    def __init__(self,
+                 alpha = 0.5,
+                 N = 100,
+                 demand_distribution = (50, 10),
+                 demand_sampling_function = np.random.normal):
+        self.Nagents = int(N*alpha)
+        self.Nhumans = int(N*(1-alpha))
+        self.demand = (demand_sampling_function, demand_distribution)
+
+    def act(self, prices):
+        demand = generate_demand(prices, *self.demand)
+        sample_n = lambda n, human: [sample_behavior(demand, human=human) for _ in range(n)]
+        human_t, agent_t = sample_n(100, True), sample_n(100, False)
+        trajectories = human_t + agent_t
+        demand_estimate = estimate_demand(trajectories)
+        return demand_estimate
+
+    def measure(self):
+        pass
+
+class PricingEngine():
+    def __init__(self,
+                 ) -> None:
+        pass
+
+    def act(self, demand):
+        return np.random.uniform(low=25, high=100, size=10)
+
+
+
+class Limbo():
+    def __init__(self,
+                 platform,
+                 market
+                 ) -> None:
+        self.platform_turn = True
+        self.platform = platform
+        self.market = market
+        self.output = None
+
+    def step(self):
+        # we could code golf this a little bit
+        if self.platform_turn:
+            self.output = self.platform.act(self.output)
+        else:
+            self.output = self.market.act(self.output)
+        print(self.output)
+        self.platform_turn = not self.platform_turn
+
+if __name__ == "__main__":
+    platform = PricingEngine()
+    market = MarketEngine()
+    limbo = Limbo(platform, market)
+    for _ in range(10):
+        limbo.step()
--- a/engine/lib/init.py
+++ b/engine/lib/init.py
@@ -0,0 +1,3 @@
+from .demand import generate_demand, estimate_demand
+from .behavior import sample_behavior
+from .render import DashboardRenderer, style_axis
--- a/engine/lib/behavior.py
+++ b/engine/lib/behavior.py
@@ -0,0 +1,47 @@
+from sim.rl.behavior_loader.models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions
+import pandas as pd
+import numpy as np
+from .demand import generate_demand
+
+base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
+human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
+
+_cache = {}  # lazy cache for models and base pivots
+
+def _get_base_pivot(human: bool):
+    key = 'human' if human else 'agent'
+    if key not in _cache:
+        model = BehaviorModel(human_dir) if human else AgentBehaviorModel(agent_dir)
+        mdp = model.build_MDP()
+        _cache[key] = pd.DataFrame(aggregate_event_transitions(mdp)).fillna(0.0)
+    return _cache[key]
+
+def adjust_behavior_to_condition(condition, transition_matrix):
+    # expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition
+    cond_norm = condition / np.sum(condition)
+    n_products = len(condition)
+    base_vals = transition_matrix.values
+    base_cols, base_rows = transition_matrix.columns.tolist(), transition_matrix.index.tolist()
+
+    # expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm
+    expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
+    new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
+    new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
+    return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
+
+def sample_behavior(condition, human=True, max_len=40):
+    base_pivot = _get_base_pivot(human)
+    adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot)
+
+    trajectory = [np.random.choice(adjusted_transitions.index)]
+    while len(trajectory) < max_len or 'checkout' in trajectory[-1]:
+        probs = adjusted_transitions.loc[trajectory[-1]].values
+        sample = np.random.choice(adjusted_transitions.columns, p=probs/np.sum(probs) if np.sum(probs) > 0 else None)
+        trajectory.append(sample)
+    return trajectory
+
+if __name__ == "__main__":
+    t=sample_behavior(generate_demand(np.array([10,20,30])), human=True)
+    print(t)
+    t=sample_behavior(generate_demand(np.array([10,20,30])), human=False)
+    print(t)
--- a/engine/lib/demand.py
+++ b/engine/lib/demand.py
@@ -0,0 +1,45 @@
+import logging
+import numpy as np
+from logging import getLogger
+logger = getLogger(__name__)
+
+def generate_demand(prices, distribution_method = np.random.normal, distribution_params = (50.0, 10.0)):
+    # assumption 1: each product has an intrinsic valuation drawn from a normal distribution centered at 50
+    product_valuations = distribution_method(*distribution_params, size=len(prices))
+    # assumption 2: demand decreases as price increases, following a simple linear model
+    demand = np.maximum(0, product_valuations - prices) # demand cannot be negative
+    total = np.sum(demand)
+    demand = demand / total * 100 if total > 0 else demand  # normalize to percentage, avoid div by zero
+    logger.info(f"Generated demand for prices {prices}: {demand} with valuations from distribution {distribution_params}")
+    return demand
+
+def estimate_demand(trajectories):
+    demand_estimate = {}
+    for traj in trajectories:
+        for event in traj:
+            if 'view_product' in event:
+                product_id = int(event.split('_')[-1].replace('product', ''))
+                demand_estimate[product_id] = demand_estimate.get(product_id, 0) + 1
+    total_views = sum(demand_estimate.values())
+    for product_id in demand_estimate:
+        demand_estimate[product_id] = (demand_estimate[product_id] / total_views) * 100  # normalize to percentage
+    return demand_estimate
+
+# Example usage
+if __name__ == "__main__":
+    np.random.seed(42)
+    prices = np.array([20.0, 35.0, 50.0, 65.0])
+    demand = generate_demand(prices)
+    print("Generated Demand:", demand)
+    from .behavior import sample_behavior
+    N, alphat =200, 0.1
+    trajectories = []
+    for _ in range(int(N*(1 - alphat))):
+        trajectories.append(sample_behavior(demand, human=True))
+    for _ in range(int(N*alphat)):
+        trajectories.append(sample_behavior(demand, human=False))
+    demand_estimate = estimate_demand(trajectories)
+    print("Estimated Demand from Behavior:", demand_estimate)
+    delta = {k: demand_estimate.get(k, 0) - demand[i] for i, k in enumerate(range(len(prices)))}
+    delta = np.mean([np.abs(v) for v in delta.values()])
+    print("Demand Delta:", delta)
--- a/engine/lib/render.py
+++ b/engine/lib/render.py
@@ -0,0 +1,126 @@
+"""rendering logic for PHANTOM environment dashboard"""
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+
+
+def style_axis(ax, title: str = None, xlabel: str = None, ylabel: str = None):
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    if title: ax.set_title(title, fontsize=11, fontweight='bold', pad=8)
+    if xlabel: ax.set_xlabel(xlabel, fontsize=9)
+    if ylabel: ax.set_ylabel(ylabel, fontsize=9)
+
+
+class DashboardRenderer:
+    """stateful renderer for PHANTOM market dynamics visualization"""
+
+    def __init__(self):
+        self.fig = None
+        self.gs = None
+
+    def render(self, env) -> None:
+        if self.fig is None:
+            plt.ion()
+            self.fig = plt.figure(figsize=(14, 10))
+            self.gs = GridSpec(3, 3, figure=self.fig, hspace=0.35, wspace=0.3,
+                               left=0.07, right=0.95, top=0.92, bottom=0.08)
+            plt.show(block=False)
+
+        self.fig.clear()
+        self.fig.suptitle(f'PHANTOM  Market Dynamics  [t={env._step_count}, a={env.alpha:.2f}]',
+                          fontsize=14, fontweight='bold')
+
+        demand_mat = np.array(env._demand_history).T
+        price_mat = np.array(env._price_history).T
+        elasticity = env._compute_elasticity()
+
+        self._render_scatter(env)
+        self._render_elasticity_bar(env, elasticity)
+        self._render_session_pie(env)
+        self._render_price_heatmap(price_mat)
+        self._render_demand_heatmap(demand_mat)
+        self._render_correlation(env.n_products, price_mat, demand_mat)
+        self._render_revenue(env)
+
+        self.fig.canvas.draw_idle()
+        self.fig.canvas.flush_events()
+
+    def _render_scatter(self, env):
+        ax = self.fig.add_subplot(self.gs[0, 0])
+        prices_flat = np.array(env._price_history).flatten()
+        demands_flat = np.array(env._demand_history).flatten()
+        product_ids = np.tile(np.arange(env.n_products), len(env._price_history))
+        ax.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none')
+        if len(prices_flat) > 1:
+            z = np.polyfit(prices_flat, demands_flat, 1)
+            p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50)
+            ax.plot(p_line, np.polyval(z, p_line), '--', lw=1.5, alpha=0.8)
+        style_axis(ax, "Price-Demand Relationship", "Price ($)", "Demand")
+
+    def _render_elasticity_bar(self, env, elasticity):
+        ax = self.fig.add_subplot(self.gs[0, 1])
+        ax.barh(range(env.n_products), elasticity, alpha=0.8)
+        ax.axvline(0, lw=0.8, alpha=0.5)
+        ax.axvline(-1, lw=1, ls='--', alpha=0.5)
+        ax.set_yticks(range(env.n_products))
+        ax.set_yticklabels([f'P{i}' for i in range(env.n_products)], fontsize=7)
+        style_axis(ax, "Price Elasticity", "(dQ/dP)(P/Q)", None)
+
+    def _render_session_pie(self, env):
+        ax = self.fig.add_subplot(self.gs[0, 2])
+        n_h, n_a = env.market.Nhumans, env.market.Nagents
+        wedges, _ = ax.pie([n_h, n_a], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': 'white'})
+        ax.legend(wedges, [f'H ({n_h})', f'A ({n_a})'], loc='lower center', fontsize=8,
+                  frameon=False, bbox_to_anchor=(0.5, -0.05))
+        ax.set_title("Session Mix", fontsize=11, fontweight='bold')
+
+    def _render_price_heatmap(self, price_mat):
+        ax = self.fig.add_subplot(self.gs[1, :2])
+        im = ax.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower')
+        style_axis(ax, "Price Heatmap P(product, t)", "Step", "Product")
+        cbar = self.fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02)
+        cbar.set_label('$', fontsize=8)
+
+    def _render_demand_heatmap(self, demand_mat):
+        ax = self.fig.add_subplot(self.gs[1, 2])
+        im = ax.imshow(demand_mat, aspect='auto', cmap='Blues', origin='lower')
+        style_axis(ax, "Demand Q(product, t)", "Step", None)
+        self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
+
+    def _render_correlation(self, n_products, price_mat, demand_mat):
+        ax = self.fig.add_subplot(self.gs[2, 0])
+        if price_mat.shape[1] > 2:
+            corr = np.corrcoef(price_mat, demand_mat)[:n_products, n_products:]
+            im = ax.imshow(corr, cmap='RdBu', vmin=-1, vmax=1, aspect='auto')
+            ax.set_xticks(range(n_products))
+            ax.set_yticks(range(n_products))
+            ax.set_xticklabels([f'Q{i}' for i in range(n_products)], fontsize=6)
+            ax.set_yticklabels([f'P{i}' for i in range(n_products)], fontsize=6)
+            self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
+        style_axis(ax, "Price-Demand Correlation", None, None)
+
+    def _render_revenue(self, env):
+        ax = self.fig.add_subplot(self.gs[2, 1:])
+        n_steps = len(env._revenue_history)
+        demand_std = [np.std(d) for d in env._demand_history]
+        ax.fill_between(range(n_steps), env._revenue_history, alpha=0.3)
+        ax.plot(env._revenue_history, linewidth=2, label='Revenue')
+        ax.set_xlim(0, max(n_steps, 1))
+        ax.set_ylim(0, max(env._revenue_history) * 1.1 if env._revenue_history else 1)
+
+        ax2 = ax.twinx()
+        ax2.plot(range(n_steps), demand_std, linewidth=2, ls='-', alpha=0.9, label='sigma(Demand)')
+        d_min, d_max = min(demand_std), max(demand_std)
+        margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5
+        ax2.set_ylim(max(0, d_min - margin), d_max + margin)
+        ax2.set_ylabel('Demand sigma', fontsize=9)
+
+        style_axis(ax, "Revenue & Demand Dispersion", "Step", "Revenue ($)")
+        ax.legend(loc='upper left', fontsize=7, frameon=False)
+        ax2.legend(loc='upper right', fontsize=7, frameon=False)
+
+    def close(self):
+        if self.fig:
+            plt.close(self.fig)
+            self.fig = None
--- a/engine/studies/factors.py
+++ b/engine/studies/factors.py
@@ -0,0 +1,34 @@
+"""shared factor definitions for experimental designs"""
+import numpy as np
+from dataclasses import dataclass, field
+from typing import Callable, Any
+
+@dataclass
+class Factor:
+    name: str
+    levels: list
+    primary: bool = True  # full cross vs sampled
+
+# demand functions with compatible signatures
+def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size))
+def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size)
+def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size)
+def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size)
+
+DEMAND_FUNCTIONS = {
+    "linear": demand_linear,
+    "uniform": demand_uniform,
+    "exponential": demand_exponential,
+    "logistic": demand_logistic,
+}
+
+FACTORS = [
+    Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True),
+    Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True),
+    Factor("n_products", [5, 15, 30, 50], primary=True),
+    Factor("demand_mu", [30.0, 50.0, 70.0], primary=False),
+    Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False),
+    Factor("N", [100, 500, 1000], primary=False),
+]
+
+SEEDS_PER_CONFIG = 5
--- a/engine/studies/full_factorial.py
+++ b/engine/studies/full_factorial.py
@@ -0,0 +1,89 @@
+"""full factorial design - all factor combinations"""
+import sys
+sys.path.insert(0, "..")
+import logging
+from itertools import product
+import json
+import hashlib
+from pathlib import Path
+from concurrent.futures import ProcessPoolExecutor
+from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+log = logging.getLogger(__name__)
+
+def generate_configs():
+    """generate all factor combinations with seeds"""
+    all_levels = [f.levels for f in FACTORS]
+    names = [f.name for f in FACTORS]
+
+    configs = []
+    for combo in product(*all_levels):
+        base = {names[i]: combo[i] for i in range(len(names))}
+        for seed in range(SEEDS_PER_CONFIG):
+            cfg = {**base, "seed": seed}
+            cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
+            configs.append(cfg)
+    return configs
+
+def run_single(cfg: dict) -> dict:
+    """execute one experiment config, return metrics"""
+    from engine.wrapper import PHANTOM
+    import numpy as np
+
+    np.random.seed(cfg["seed"])
+    demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
+
+    env = PHANTOM(
+        n_products=cfg["n_products"],
+        alpha=cfg["alpha"],
+        N=cfg["N"],
+    )
+    env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
+
+    obs, _ = env.reset()
+    total_reward, steps = 0.0, 0
+
+    for _ in range(100):
+        action = env.action_space.sample()
+        obs, reward, term, trunc, _ = env.step(action)
+        total_reward += reward
+        steps += 1
+        if term: break
+
+    env.close()
+    return {
+        "id": cfg["id"],
+        "config": cfg,
+        "total_reward": total_reward,
+        "avg_reward": total_reward / steps,
+        "steps": steps,
+    }
+
+def run_study(max_workers: int = None, output: str = "results_full.jsonl"):
+    configs = generate_configs()
+    log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)")
+
+    results = []
+    with ProcessPoolExecutor(max_workers=max_workers) as ex:
+        for i, result in enumerate(ex.map(run_single, configs)):
+            results.append(result)
+            if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
+
+    Path(output).write_text("\n".join(json.dumps(r) for r in results))
+    log.info(f"wrote {len(results)} results to {output}")
+    return results
+
+if __name__ == "__main__":
+    import argparse
+    p = argparse.ArgumentParser()
+    p.add_argument("--workers", type=int, default=None)
+    p.add_argument("--output", default="results_full.jsonl")
+    p.add_argument("--dry-run", action="store_true", help="only show design size")
+    args = p.parse_args()
+
+    configs = generate_configs()
+    log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}")
+
+    if not args.dry_run:
+        run_study(args.workers, args.output)
--- a/engine/studies/mixed_lh.py
+++ b/engine/studies/mixed_lh.py
@@ -0,0 +1,106 @@
+"""mixed design: full factorial on primary factors, latin hypercube on secondary"""
+import sys
+sys.path.insert(0, "..")
+import logging
+from itertools import product
+import json
+import hashlib
+from pathlib import Path
+from concurrent.futures import ProcessPoolExecutor
+import numpy as np
+from scipy.stats.qmc import LatinHypercube
+from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+log = logging.getLogger(__name__)
+
+LH_SAMPLES = 10
+
+def generate_configs(lh_samples: int = LH_SAMPLES):
+    primary = [f for f in FACTORS if f.primary]
+    secondary = [f for f in FACTORS if not f.primary]
+
+    primary_grid = list(product(*[f.levels for f in primary]))
+    lhs = LatinHypercube(d=len(secondary), seed=42)
+
+    configs = []
+    for p_combo in primary_grid:
+        samples = lhs.random(n=lh_samples)
+        for s in samples:
+            sec_vals = {
+                secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))]
+                for i in range(len(secondary))
+            }
+            base = {primary[i].name: p_combo[i] for i in range(len(primary))}
+            base.update(sec_vals)
+
+            for seed in range(SEEDS_PER_CONFIG):
+                cfg = {**base, "seed": seed}
+                cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
+                configs.append(cfg)
+    return configs
+
+def run_single(cfg: dict) -> dict:
+    from engine.wrapper import PHANTOM
+    import numpy as np
+
+    np.random.seed(cfg["seed"])
+    demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
+
+    env = PHANTOM(
+        n_products=cfg["n_products"],
+        alpha=cfg["alpha"],
+        N=cfg["N"],
+    )
+    env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
+
+    obs, _ = env.reset()
+    total_reward, steps = 0.0, 0
+
+    for _ in range(100):
+        action = env.action_space.sample()
+        obs, reward, term, trunc, _ = env.step(action)
+        total_reward += reward
+        steps += 1
+        if term: break
+
+    env.close()
+    return {
+        "id": cfg["id"],
+        "config": cfg,
+        "total_reward": total_reward,
+        "avg_reward": total_reward / steps,
+        "steps": steps,
+    }
+
+def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES):
+    configs = generate_configs(lh_samples)
+    n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary]))
+    log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)")
+
+    results = []
+    with ProcessPoolExecutor(max_workers=max_workers) as ex:
+        for i, result in enumerate(ex.map(run_single, configs)):
+            results.append(result)
+            if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
+
+    Path(output).write_text("\n".join(json.dumps(r) for r in results))
+    log.info(f"wrote {len(results)} results to {output}")
+    return results
+
+if __name__ == "__main__":
+    import argparse
+    p = argparse.ArgumentParser()
+    p.add_argument("--workers", type=int, default=None)
+    p.add_argument("--output", default="results_mixed.jsonl")
+    p.add_argument("--lh-samples", type=int, default=10)
+    p.add_argument("--dry-run", action="store_true", help="only show design size")
+    args = p.parse_args()
+
+    primary = [f for f in FACTORS if f.primary]
+    secondary = [f for f in FACTORS if not f.primary]
+    configs = generate_configs(args.lh_samples)
+    log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}")
+
+    if not args.dry_run:
+        run_study(args.workers, args.output, args.lh_samples)
--- a/engine/train.py
+++ b/engine/train.py
@@ -0,0 +1,45 @@
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
+from .wrapper import PHANTOM
+
+
+class RenderCallback(BaseCallback):
+    """Renders environment on every step for live visualization."""
+    def __init__(self, env: PHANTOM):
+        super().__init__()
+        self.env = env
+
+    def _on_step(self) -> bool:
+        self.env.render()
+        return True
+
+
+env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
+eval_env = PHANTOM(n_products=10, alpha=0.3, render_mode=None)
+
+model = SAC(
+    "MultiInputPolicy",
+    env,
+    verbose=1,
+    learning_rate=3e-4,
+    buffer_size=50000,
+    batch_size=256,
+    tau=0.005,
+    gamma=0.99,
+)
+
+render_cb = RenderCallback(env)
+eval_cb = EvalCallback(eval_env, eval_freq=1000, n_eval_episodes=5, verbose=1)
+
+model.learn(total_timesteps=50000, callback=[render_cb, eval_cb])
+model.save("phantom_sac")
+
+# test trained policy
+env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
+obs, _ = env.reset()
+for _ in range(100):
+    action, _ = model.predict(obs, deterministic=True)
+    obs, reward, term, trunc, _ = env.step(action)
+    env.render()
+    if term or trunc: break
+env.close()
--- a/engine/wrapper.py
+++ b/engine/wrapper.py
@@ -0,0 +1,118 @@
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+from .engine import Limbo, MarketEngine, PricingEngine
+from .lib.render import DashboardRenderer
+
+
+class PHANTOM(gym.Env):
+    """Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand."""
+    metadata = {"render_modes": ["human", "ansi"]}
+
+    def __init__(self,
+                 n_products: int = 10,
+                 alpha: float = 0.3,
+                 N: int = 100,
+                 price_bounds: tuple = (10.0, 150.0),
+                 lambda_coi: float = 0.1,
+                 render_mode: str = None):
+        super().__init__()
+        self.n_products = n_products
+        self.price_bounds = price_bounds
+        self.lambda_coi = lambda_coi
+        self.render_mode = render_mode
+        self.alpha = alpha
+        self.N = N
+
+        self.market = MarketEngine(alpha=alpha, N=N)
+        self._platform_stub = PricingEngine()
+        self._limbo = Limbo(self._platform_stub, self.market)
+
+        self.action_space = spaces.Box(
+            low=price_bounds[0], high=price_bounds[1],
+            shape=(n_products,), dtype=np.float32
+        )
+        self.observation_space = spaces.Dict({
+            "demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32),
+            "prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32),
+        })
+
+        self._prices = None
+        self._demand = None
+        self._step_count = 0
+        self._demand_history = []
+        self._price_history = []
+        self._revenue_history = []
+        self._renderer = None
+
+    def _get_obs(self) -> dict:
+        demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
+        return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
+
+    def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
+        revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
+        # TODO: implement supra-competitive price punishment
+        return float(revenue)
+
+    def _record_history(self):
+        demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
+        self._demand_history.append(demand_arr)
+        self._price_history.append(self._prices.copy())
+        self._revenue_history.append(np.sum(self._prices * demand_arr))
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
+        self._demand = self.market.act(self._prices)
+        self._step_count = 0
+        self._demand_history, self._price_history, self._revenue_history = [], [], []
+        self._record_history()
+        return self._get_obs(), {}
+
+    def step(self, action: np.ndarray):
+        self._prices = np.clip(action, *self.price_bounds)
+        self._demand = self.market.act(self._prices)
+        self._step_count += 1
+        self._record_history()
+
+        reward = self._compute_reward(self._prices, self._demand)
+        terminated = self._step_count >= 100
+
+        return self._get_obs(), reward, terminated, False, {"step": self._step_count}
+
+    def _compute_elasticity(self) -> np.ndarray:
+        """point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
+        if len(self._price_history) < 2:
+            return np.zeros(self.n_products)
+        p, q = np.array(self._price_history), np.array(self._demand_history)
+        dp, dq = np.diff(p, axis=0), np.diff(q, axis=0)
+        valid = np.abs(dp) > 0.5
+        with np.errstate(divide='ignore', invalid='ignore'):
+            elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0)
+            elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0)
+        return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products)
+
+    def render(self):
+        if self.render_mode == "human":
+            if self._renderer is None:
+                self._renderer = DashboardRenderer()
+            self._renderer.render(self)
+        elif self.render_mode == "ansi":
+            return f"step={self._step_count}, prices={self._prices}, demand={self._demand}"
+        return None
+
+    def close(self):
+        if self._renderer:
+            self._renderer.close()
+            self._renderer = None
+
+
+if __name__ == "__main__":
+    env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human")
+    obs, _ = env.reset()
+    for step in range(100):
+        action = env.action_space.sample()
+        obs, reward, term, trunc, info = env.step(action)
+        env.render()
+        if term: break
+    env.close()
--- a/lab/README.md
+++ b/lab/README.md
@@ -0,0 +1,75 @@
+# MOS (Money Operating System)
+
+Research-grade quote-control simulator for studying dynamic pricing and market making policies.
+The system models pricing as a closed loop of **Quote → Arrival → Execution → Position**, enabling
+controlled experimentation with demand models, inventory constraints, and reward shaping.
+
+## Core Loop
+
+1. **Quote** – the policy posts prices (one-sided or two-sided depending on the mechanism).
+2. **Arrival** – a population model generates purchase opportunities or market orders.
+3. **Execution** – an execution model decides whether an arrival converts at the quoted price.
+4. **Position** – inventory/position limits censor fills and generate holding/shortage costs.
+5. **Observation & Reward** – censored fills and aggregate metrics are exposed to the agent, while
+   objectives turn metrics into a scalar reward.
+
+Each stage is pluggable via light-weight protocols so you can swap in alternative mechanisms,
+demand models, or objectives without rewriting the rest of the simulator.
+
+## Package Layout
+
+| Module            | Purpose |
+|-------------------|---------|
+| `lab.outlet`      | Core simulation engine, domain types, pricing mechanisms, objectives. |
+| `lab.population`  | Demand arrival models, execution probability models, competitor/market dynamics. |
+| `lab.experiments` | Rollout utilities, baseline policies, and off-policy evaluation helpers. |
+| `lab.config`      | Convenience factories for preconfigured retail and market-making environments. |
+
+## Preconfigured Scenarios
+
+### Retail Dynamic Pricing
+- Mechanism: posted prices with margin and delta constraints.
+- Arrivals: browsing sessions with contamination support (scrapers).
+- Execution: elasticity model with competitor cross-effects.
+- Position: inventory tracking with holding and shortage costs.
+- Market: reactive competitor that can trigger price wars.
+- Objective: PnL minus volatility, holding cost, and lost opportunity penalties.
+
+```python
+from lab.config import make_retail_platform
+from lab.experiments import rollout, fixed_price_policy
+
+platform = make_retail_platform()
+policy = fixed_price_policy(platform.instruments.refs)
+result = rollout(platform, policy, n_steps=100)
+print(result.total_pnl)
+```
+
+### Market Making
+- Mechanism: two-sided quoting with bid/ask spreads.
+- Arrivals: Hawkes order flow for clustered demand.
+- Execution: Avellaneda–Stoikov style intensity model.
+- Position: inventory risk limits and quadratic penalty objective.
+- Market: geometric Brownian motion mid-price process.
+- Objective: PnL plus spread capture minus inventory risk.
+
+```python
+from lab.config import make_market_making_platform
+from lab.experiments import rollout
+
+platform = make_market_making_platform()
+mm_policy = lambda obs, t: (platform.instruments.refs, 1.0)
+result = rollout(platform, mm_policy, n_steps=200, seed=42)
+print(result.total_pnl)
+```
+
+## Extending the Simulator
+
+- Implement `lab.outlet.protocols.Mechanism` or `ArrivalModel` to introduce new pricing
+domains or demand processes.
+- Compose objectives with `lab.outlet.objectives.factory.make_composite` to study alternate
+reward formulations.
+- Use `lab.experiments.compare_policies` to benchmark candidate policies across multiple
+random seeds.
+
+Comprehensive API documentation lives in `lab/docs` (build with `make html`).
--- a/lab/init.py
+++ b/lab/init.py
@@ -0,0 +1,27 @@
+"""
+Quote-Control Simulator: Research-grade platform for dynamic pricing and market making
+
+The platform abstracts pricing as: Quote -> Arrival -> Execution -> Position
+Supports multiple mechanisms:
+  - PostedPrice: retail dynamic pricing
+  - TwoSided: market making with bid-ask spreads
+  - Auction: reserve/shading for auction settings
+
+Example usage:
+    from lab.config import make_retail_platform
+    from lab.experiments import rollout, fixed_price_policy
+
+    platform = make_retail_platform()
+    policy = fixed_price_policy(platform.instruments.refs)
+    result = rollout(platform, policy, n_steps=100)
+    print(f"Total PnL: {result.total_pnl:.2f}")
+"""
+
+from .config import make_retail_platform, make_market_making_platform, RetailConfig, MarketMakingConfig
+from .outlet import Platform, PlatformConfig, Quote, Observation, StepResult
+
+__all__ = [
+    'make_retail_platform', 'make_market_making_platform',
+    'RetailConfig', 'MarketMakingConfig',
+    'Platform', 'PlatformConfig', 'Quote', 'Observation', 'StepResult',
+]
--- a/lab/case/init.py
+++ b/lab/case/init.py
@@ -0,0 +1,6 @@
+"""
+Case studies implementing specific research scenarios.
+
+Available cases:
+- thesis: PHANTOM thesis implementation with contaminated demand and DR-RL
+"""
--- a/lab/case/thesis/init.py
+++ b/lab/case/thesis/init.py
@@ -0,0 +1,25 @@
+"""
+Thesis-specific implementation of the PHANTOM pricing defense framework.
+
+This module implements the mathematical models from the thesis:
+- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3)
+- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1)
+- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23)
+- COIMetrics: Cost of Information tracking (Definition 1)
+
+The platform configuration creates a research environment that directly
+maps to the thesis mathematical framework for DR-RL experiments.
+"""
+from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
+from .execution import HybridExecutionModel, HybridExecutionConfig
+from .objectives import RobustStackelbergObjective, COIObjective
+from .platform import make_thesis_platform, ThesisConfig
+from .metrics import COIMetrics, compute_coi, compute_separability
+
+__all__ = [
+    'ContaminatedArrivalModel', 'ContaminatedArrivalConfig',
+    'HybridExecutionModel', 'HybridExecutionConfig',
+    'RobustStackelbergObjective', 'COIObjective',
+    'make_thesis_platform', 'ThesisConfig',
+    'COIMetrics', 'compute_coi', 'compute_separability',
+]
--- a/lab/case/thesis/arrivals.py
+++ b/lab/case/thesis/arrivals.py
@@ -0,0 +1,327 @@
+"""Contaminated arrivals using learned MDP kernels from behavior_loader.
+
+Implements thesis demand model (Section 3.1):
+- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t  (Eq 3)
+- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i]     (Eq 2)
+- Per-session separability via KL divergence Δ_H, Δ_A              (Eq 20-21)
+
+The arrival model samples sessions from a mixture of human/agent behavioral profiles,
+each session produces a trajectory τ_s and associated demand computation q(τ').
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from types import SimpleNamespace
+from typing import Dict, List, Tuple, Optional
+import numpy as np
+from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
+from ...outlet.constants import Side, OpportunityType
+from ...outlet.math_util import poisson_arrivals
+
+try:
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+    from sim.rl.behavior_loader.models import (
+        BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence
+    )
+    REAL_MDP = True
+except ImportError:
+    REAL_MDP = False
+    kl_divergence = None
+
+EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details",
+              "add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"}
+EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page",
+               "view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item",
+               "add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"}
+
+# action space partition A = A_nav ∪ A_cart ∪ A_filter ∪ A_dwell with signal weights ω (Table 1)
+ACTION_WEIGHTS: Dict[str, float] = {
+    "add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0,  # A_cart
+    "hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25,               # A_dwell
+    "page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav
+    "search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0,   # A_filter
+}
+
+
+@dataclass
+class SessionDemand:
+    """Per-session demand computation per thesis formulation (Section 3.1).
+
+    Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses
+    divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ').
+    """
+    session_id: str
+    q: Dict[int, float]               # q̂_i demand proxy per product (Eq 2)
+    trajectory: List[Dict]            # τ_s = (e_{s,1}, ..., e_{s,L_s})
+    delta_h: float = 0.0              # D_KL(T̂' || T̄_H) (Eq 20)
+    delta_a: float = 0.0              # D_KL(T̂' || T̄_A) (Eq 21)
+    alpha_hat: float = 0.0            # per-session contamination estimate
+    actor_class: str = "H"            # ground truth Y_s ∈ {H, A}
+    theta: Dict[str, float] = field(default_factory=dict)
+
+
+def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]:
+    """Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2."""
+    q = {i: 0.0 for i in range(n_products)}
+    for e in events:
+        action, pidx = e.get("eventName", ""), e.get("product_idx")
+        if pidx is not None and 0 <= pidx < n_products:
+            q[pidx] += ACTION_WEIGHTS.get(action, 0.1)
+    return q
+
+
+def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]:
+    """Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21)."""
+    if not events or kl_divergence is None:
+        return 0.0, 0.0
+    # build empirical transition kernel from trajectory
+    trans: Dict[str, Dict[str, int]] = {}
+    prev = "session_start"
+    for e in events:
+        curr = e.get("eventName", "session_end")
+        trans.setdefault(prev, {})
+        trans[prev][curr] = trans[prev].get(curr, 0) + 1
+        prev = curr
+    # normalize to probabilities
+    kernel = {}
+    for s, dests in trans.items():
+        total = sum(dests.values())
+        kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {}
+    # aggregate to event-level and compute KL divergence against reference kernels
+    delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
+    delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
+    return delta_h, delta_a
+
+def _canonicalize(raw: Dict) -> Dict:
+    out = {}
+    for src, dsts in raw.items():
+        sc = EVENT_CANON.get(src, src)
+        out.setdefault(sc, {})
+        for dst, p in dsts.items():
+            dc = EVENT_CANON.get(dst, dst)
+            out[sc][dc] = out[sc].get(dc, 0.0) + p
+    return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0}
+
+
+class BehavioralProfile:
+    """Markov profile from learned MDP kernels (Section 3.5.2).
+
+    Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19)
+    """
+    STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
+    # fallback kernels T̄_H, T̄_A when real data unavailable
+    FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15},
+                  "view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
+                  "learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
+                  "add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
+                  "purchase_complete": {"session_end": 1.0}}
+    FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05},
+                  "view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05},
+                  "learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05},
+                  "add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4},
+                  "purchase_complete": {"session_end": 1.0}}
+
+    def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""):
+        self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95)
+        self.trans = self._load(data_dir)  # T̂_Y transition kernel
+        self._ensure_terminal()
+        self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES}
+
+    def _load(self, data_dir: str) -> Dict:
+        if not REAL_MDP or not data_dir:
+            print("using fallback")
+            return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+        try:
+            mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP()
+            raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
+            return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+        except Exception:
+            print("using fallback")
+            return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
+
+    def _ensure_terminal(self):
+        self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0)
+        self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1})
+
+    def _tprobs(self, state: str, pidx: int) -> Dict[str, float]:
+        probs = dict(self.trans.get(state, {"session_end": 1.0}))
+        if state == "add_item_to_cart":
+            base = probs.get("purchase_complete", 0.0)
+            df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0)
+            adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95)
+            rem = max(1e-6, 1.0 - adj)
+            other = sum(v for k, v in probs.items() if k != "purchase_complete")
+            probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()}
+        total = sum(probs.values())
+        return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0}
+
+    def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]:
+        events, fevts = [], []
+        state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices)))
+        cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05)
+
+        while state != "session_end" and len(events) < 40:
+            if state != "session_start":
+                row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human",
+                       "eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}",
+                       "price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"),
+                       "ts": t, "unit_cost": cost, "base_price": float(prices[pidx])}
+                if state == "purchase_complete":
+                    row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost)
+                events.append(row)
+                fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t))
+
+            probs = self._tprobs(state, pidx)
+            state = rng.choice(list(probs.keys()), p=list(probs.values()))
+            sh, sc = self.dwell.get(state, (2.0, 1.0))
+            t += max(0.3, rng.gamma(shape=sh, scale=sc))
+        return events, fevts
+
+
+@dataclass
+class ContaminatedArrivalConfig:
+    base_rate: float = 20.0
+    alpha_contamination: float = 0.2
+    alpha_drift: float = 0.0
+    alpha_bounds: tuple[float, float] = (0.0, 0.5)
+    human_views_range: tuple[int, int] = (1, 4)
+    agent_views_range: tuple[int, int] = (3, 10)
+    agent_systematic: bool = True
+    use_real_behavior: bool = True
+    human_data_dir: str = ""
+    agent_data_dir: str = ""
+
+
+class ContaminatedArrivalModel:
+    """Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3).
+
+    Samples sessions from human/agent behavioral profiles, computes per-session
+    demand proxy q̂ and divergence signals Δ_H, Δ_A for separability.
+    """
+
+    def __init__(self, cfg: ContaminatedArrivalConfig | None = None):
+        self.cfg = cfg or ContaminatedArrivalConfig()
+        self._alpha = self.cfg.alpha_contamination
+        self._scount = 0
+        self._profiles: Dict[str, BehavioralProfile] = {}
+        self._ref_kernels: Dict[str, Dict] = {}  # T̄_H, T̄_A reference kernels
+        self._session_demands: List[SessionDemand] = []  # collected session demands
+
+    @property
+    def alpha(self) -> float:
+        return self._alpha
+
+    def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile:
+        key = actor
+        if key not in self._profiles:
+            ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir
+            if not ddir and self.cfg.use_real_behavior:
+                base = Path(__file__).parent.parent.parent.parent / "experiments"
+                ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data"))
+            profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "")
+            self._profiles[key] = profile
+            self._ref_kernels[key] = profile.trans  # cache T̄_Y for divergence
+        return self._profiles[key]
+
+    def get_ref_kernels(self) -> Tuple[Dict, Dict]:
+        """Return reference transition kernels T̄_H, T̄_A for divergence computation."""
+        return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H),
+                self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A))
+
+    def get_session_demands(self) -> List[SessionDemand]:
+        """Return collected session demands for downstream analysis."""
+        return self._session_demands
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
+        """Sample arrivals as per Eq 3: mixture of human/agent demand distributions.
+
+        For each session s, computes:
+        - Trajectory τ_s from behavioral profile sampling
+        - Demand proxy q̂ via weighted action aggregation (Eq 2)
+        - Divergence signals Δ_H, Δ_A for separability (Eq 20-21)
+        - Per-session contamination estimate α̂(τ')
+        """
+        cfg = self.cfg
+        if cfg.alpha_drift != 0:
+            self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds)
+        hidden.contamination = self._alpha
+
+        n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng)
+        prices, costs = instruments.refs, instruments.costs
+        margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0)
+        hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin)
+        ref_h, ref_a = self.get_ref_kernels()
+
+        opps = []
+        for _ in range(n_sess):
+            self._scount += 1
+            sid = f"s{self._scount:06d}"
+            is_agent = rng.random() < self._alpha
+            actor, probs = ("agents", aprob) if is_agent else ("humans", hprob)
+            profile = self._profile(actor, probs)
+            events, fevts = profile.sample(rng, sid, prices, costs)
+
+            # compute demand proxy q̂ per Eq 2
+            q = compute_demand_proxy(events, instruments.n)
+
+            # compute divergence signals Δ_H, Δ_A per Eq 20-21
+            delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a)
+            # per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A))
+            alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5
+
+            theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent
+                     else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0})
+
+            # store session demand for downstream analysis
+            self._session_demands.append(SessionDemand(
+                session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a,
+                alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta))
+
+            viewed = list({e["product_idx"] for e in events if "product_idx" in e})
+            if not viewed:
+                vr = cfg.agent_views_range if is_agent else cfg.human_views_range
+                viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False))
+
+            for vi, iid in enumerate(viewed):
+                opps.append(Opportunity(
+                    id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
+                    instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt),
+                    context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent,
+                             'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed),
+                             'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events,
+                             'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a}))
+        return opps
+
+
+@dataclass
+class AdversarialArrivalConfig:
+    base_rate: float = 5.0
+    n_parallel_agents: int = 3
+    query_all_products: bool = True
+
+
+class AdversarialArrivalModel:
+    """Adversarial coordination (Theorem 1): as N->inf, COI->0."""
+
+    def __init__(self, cfg: AdversarialArrivalConfig | None = None):
+        self.cfg = cfg or AdversarialArrivalConfig()
+        self._qcount = 0
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
+        cfg, opps = self.cfg, []
+        for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)):
+            self._qcount += 1
+            for ai in range(cfg.n_parallel_agents):
+                sid = f"adv{self._qcount:06d}-{ai}"
+                prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1)
+                for iid in prods:
+                    opps.append(Opportunity(
+                        id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
+                        instrument_id=int(iid), size=1.0, t=t,
+                        context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True,
+                                 'agent_index': ai, 'query_group': self._qcount,
+                                 'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}}))
+        return opps
--- a/lab/case/thesis/execution.py
+++ b/lab/case/thesis/execution.py
@@ -0,0 +1,91 @@
+"""Execution models with divergent H/A behavior using ground truth labels."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict
+import numpy as np
+from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState
+from ...outlet.math_util import sigmoid, safe_log, EPS
+
+
+@dataclass
+class HybridExecutionConfig:
+    human_base_prob: float = 0.3
+    human_elasticity: float = 2.5
+    agent_conversion: float = 0.01
+    cross_elasticity: float = 0.4
+    quality_weight: float = 0.2
+    use_separability: bool = False
+
+
+class HybridExecutionModel:
+    """Execution with divergent H/A behavior using ground truth labels."""
+
+    def __init__(self, cfg: HybridExecutionConfig | None = None):
+        self.cfg = cfg or HybridExecutionConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        cfg, idx = self.cfg, int(opp.instrument_id)
+        price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx])
+        ctx = opp.context
+        theta = ctx.get('theta', {})
+        is_agent = ctx.get('is_agent', False)
+
+        if is_agent:
+            return cfg.agent_conversion * theta.get('base_conversion', 1.0)
+
+        # human logit discrete choice
+        sens = theta.get('price_sensitivity', cfg.human_elasticity)
+        base = theta.get('base_conversion', cfg.human_base_prob)
+        u_price = -sens * safe_log(price / (ref + EPS))
+        quality = instruments.instruments[idx].attrs.get('quality', 0.5)
+        u_quality = cfg.quality_weight * quality
+
+        u_comp = 0.0
+        if market and market.competitor_quotes is not None:
+            cp = market.competitor_quotes[idx]
+            if cp < price:
+                u_comp = -cfg.cross_elasticity * (price - cp) / ref
+
+        utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp
+        return float(sigmoid(utility))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
+        if context is None:
+            return fills / (self.cfg.human_base_prob + EPS)
+        agent_frac = context.get('contamination', 0.0)
+        return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS)
+
+
+@dataclass
+class SeparableExecutionConfig:
+    human_funnel: Dict[str, float] = None
+    agent_funnel: Dict[str, float] = None
+
+    def __post_init__(self):
+        self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6}
+        self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1}
+
+
+class SeparableExecutionModel:
+    """Execution with Markov funnel kernels using ground truth labels."""
+
+    def __init__(self, cfg: SeparableExecutionConfig | None = None):
+        self.cfg = cfg or SeparableExecutionConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        is_agent = opp.context.get('is_agent', False)
+        probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel
+        p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase']
+
+        if not is_agent:
+            idx = int(opp.instrument_id)
+            price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS)
+            p *= np.exp(-0.5 * (price_ratio - 1.0))
+        return float(np.clip(p, 0, 1))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
+        h = self.cfg.human_funnel
+        exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase']
+        return fills / (exp_conv + EPS)
--- a/lab/case/thesis/metrics.py
+++ b/lab/case/thesis/metrics.py
@@ -0,0 +1,102 @@
+"""Thesis metrics for COI and behavioral analysis using ground truth labels."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict
+import numpy as np
+from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet
+from ...outlet.math_util import safe_log, EPS
+
+
+@dataclass
+class COIMetrics:
+    coi_level: float = 0.0
+    coi_leakage: float = 0.0
+    realized_premium: float = 0.0
+    theoretical_max: float = 0.0
+    erosion_rate: float = 0.0
+
+    def to_dict(self) -> dict[str, float]:
+        return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']}
+
+
+def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics:
+    prices, costs, refs = quote.prices, instruments.costs, instruments.refs
+    margins = prices - costs
+    coi_level = float(np.mean(margins))
+    theoretical_max = float(np.mean(costs))
+    realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0
+    price_var = float(np.var(prices / refs))
+    coi_leakage = contamination * (coi_level + price_var)
+    erosion_rate = contamination * coi_level / (theoretical_max + EPS)
+    return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium,
+                      theoretical_max=theoretical_max, erosion_rate=erosion_rate)
+
+
+@dataclass
+class SeparabilityMetrics:
+    classification_accuracy: float = 0.0
+    estimated_alpha: float = 0.0
+    n_human_sessions: int = 0
+    n_agent_sessions: int = 0
+
+
+def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics:
+    """Compute separability using ground truth labels only."""
+    if logs.events is None or len(logs.events) == 0:
+        return SeparabilityMetrics(estimated_alpha=true_alpha)
+
+    sessions: Dict[str, bool] = {}
+    for evt in logs.events:
+        sid = evt.metadata.get('session_id', evt.opportunity_id)
+        if sid not in sessions:
+            sessions[sid] = evt.metadata.get('is_agent', False)
+
+    n_agent = sum(1 for is_agent in sessions.values() if is_agent)
+    n_human = len(sessions) - n_agent
+    est_alpha = n_agent / len(sessions) if sessions else 0.0
+
+    return SeparabilityMetrics(
+        classification_accuracy=1.0,  # ground truth is always correct
+        estimated_alpha=est_alpha,
+        n_human_sessions=n_human,
+        n_agent_sessions=n_agent)
+
+
+@dataclass
+class RevenueAttribution:
+    total_revenue: float = 0.0
+    human_revenue: float = 0.0
+    agent_revenue: float = 0.0
+    human_conversion: float = 0.0
+    agent_conversion: float = 0.0
+
+
+def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution:
+    if logs.executions is None:
+        return RevenueAttribution(total_revenue=metrics.revenue)
+
+    human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0
+    for exe in logs.executions:
+        if exe.propensity < 0.05:
+            agent_rev += exe.price * exe.size_filled
+            agent_cnt += 1
+        else:
+            human_rev += exe.price * exe.size_filled
+            human_cnt += 1
+
+    total_exp = logs.aggregates.get('n_arrivals', 1)
+    return RevenueAttribution(
+        total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev,
+        human_conversion=human_cnt / (total_exp * 0.8 + EPS),
+        agent_conversion=agent_cnt / (total_exp * 0.2 + EPS))
+
+
+def order_statistic_erosion(n_agents: int, price_variance: float) -> float:
+    """COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min."""
+    if n_agents <= 1:
+        return 0.0
+    sigma, log_n = np.sqrt(price_variance), safe_log(n_agents)
+    if log_n < 1:
+        return 0.0
+    shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
+    return float(min(shift / (sigma * 2 + EPS), 1.0))
--- a/lab/case/thesis/objectives.py
+++ b/lab/case/thesis/objectives.py
@@ -0,0 +1,228 @@
+"""
+Thesis-specific objectives implementing robust pricing under contamination.
+
+Implements the Maximin objective from Eq 23:
+π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
+
+Key components:
+- COIObjective: Cost of Information penalty (Definition 1)
+- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
+- UXPenalty: User experience degradation from volatility
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ...outlet.objectives.base import BaseObjective, CompositeObjective
+from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+from ...outlet.math_util import safe_log, EPS
+
+class COIObjective(BaseObjective):
+    """Cost of Information penalty from Definition 1.
+
+    COI(π) = E[P] - p_min
+
+    The expected price premium over marginal cost represents the platform's
+    pricing power. Agent reconnaissance erodes this by revealing price
+    distribution to buyers.
+
+    We implement COI_leakage = f(τ') · InfoValue(p, τ')
+    where f(τ') is the estimated agent probability.
+    """
+
+    def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
+        """
+        Args:
+            lambda_coi: Weight on COI penalty
+            use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
+        """
+        self.lambda_coi = lambda_coi
+        self.use_revelation = use_revelation
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # COI_leakage = α · InfoValue
+        alpha = hidden.contamination
+
+        if self.use_revelation:
+            # revelation surrogate: rare prices reveal more about policy
+            # InfoValue = -log(π(p|τ')) ≈ surprise of the price
+            price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
+            info_value = price_surprise
+        else:
+            # query-tax surrogate: each agent query incurs constant leakage
+            info_value = 1.0
+
+        leakage = alpha * info_value
+        return -self.lambda_coi * leakage
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
+        return {
+            'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'contamination': alpha,
+            'avg_margin': float(np.mean(margins)),
+        }
+
+@dataclass
+class RobustObjectiveConfig:
+    """Configuration for robust Stackelberg objective.
+
+    Attributes:
+        lambda_coi: Weight on COI penalty (λ in Eq 23)
+        lambda_ux: Weight on UX penalty
+        lambda_volatility: Weight on price volatility penalty
+        gamma_inventory: Inventory risk aversion
+        wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
+    """
+    lambda_coi: float = 0.5
+    lambda_ux: float = 0.1
+    lambda_volatility: float = 0.2
+    gamma_inventory: float = 0.1
+    wasserstein_epsilon: float = 0.1
+
+class RobustStackelbergObjective(BaseObjective):
+    """Implements the Maximin Objective from thesis Eq 23.
+
+    π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
+
+    The objective balances:
+    1. Revenue R(p,d) from human purchases
+    2. COI penalty for information leakage to agents
+    3. UX penalty for price volatility
+    4. Inventory/holding costs
+
+    The min over ambiguity set U_ε is approximated by penalizing
+    high contamination scenarios more heavily.
+    """
+
+    def __init__(self, cfg: RobustObjectiveConfig | None = None):
+        self.cfg = cfg or RobustObjectiveConfig()
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        cfg = self.cfg
+
+        # 1. base revenue (R(p,d))
+        revenue = metrics.revenue
+        cost = metrics.cost
+        profit = revenue - cost
+
+        # 2. COI penalty: scales with contamination and margin extraction
+        # high margins + high contamination = high leakage
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+        coi_penalty = cfg.lambda_coi * avg_margin * alpha
+
+        # 3. UX penalty: price volatility harms legitimate users
+        volatility_penalty = cfg.lambda_volatility * metrics.volatility
+
+        # 4. inventory/position cost
+        position_penalty = cfg.gamma_inventory * metrics.position_cost
+
+        # 5. lost opportunity cost (stockouts)
+        lost_penalty = 0.1 * metrics.lost_opportunity
+
+        # robust adjustment: under adversarial distribution Q,
+        # expect lower revenue and higher costs
+        # approximate via worst-case contamination within ε-ball
+        worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
+        robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
+
+        total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
+
+        return total
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        cfg = self.cfg
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+
+        return {
+            'revenue': metrics.revenue,
+            'cost': metrics.cost,
+            'profit': metrics.revenue - metrics.cost,
+            'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
+            'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
+            'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
+            'lost_penalty': -0.1 * metrics.lost_opportunity,
+            'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
+            'contamination': alpha,
+            'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
+        }
+
+class UXPenalty(BaseObjective):
+    """User experience penalty from price volatility.
+
+    High price volatility degrades UX for legitimate human users.
+    This term ensures the defense doesn't harm real customers while
+    protecting against agent reconnaissance.
+    """
+
+    def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
+        self.scale = scale
+        self.max_vol = max_acceptable_volatility
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # penalty increases quadratically beyond threshold
+        excess_vol = max(0, metrics.volatility - self.max_vol)
+        return -self.scale * (excess_vol ** 2)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {
+            'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'volatility': metrics.volatility,
+        }
+
+class AdaptiveObjective(BaseObjective):
+    """Objective that adapts weights based on estimated contamination.
+
+    When contamination is low, focus on revenue maximization.
+    When contamination is high, increase COI defense weight.
+    """
+
+    def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
+                 adaptation_rate: float = 2.0):
+        self.base_lambda = base_lambda_coi
+        self.max_lambda = max_lambda_coi
+        self.rate = adaptation_rate
+
+    def _adaptive_lambda(self, alpha: float) -> float:
+        # sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
+        from ...outlet.math_util import sigmoid
+        scale = sigmoid(self.rate * (alpha - 0.3))
+        return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        alpha = hidden.contamination
+        lambda_coi = self._adaptive_lambda(alpha)
+
+        profit = metrics.revenue - metrics.cost
+        margins = quote.prices - instruments.costs
+        coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
+
+        return profit - coi_penalty
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        return {
+            'profit': metrics.revenue - metrics.cost,
+            'adaptive_lambda': self._adaptive_lambda(alpha),
+            'contamination': alpha,
+        }
+
+def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
+                          lambda_vol: float = 0.2) -> CompositeObjective:
+    """Create the standard thesis objective composition."""
+    return CompositeObjective([
+        (RobustStackelbergObjective(RobustObjectiveConfig(
+            lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
+    ])
--- a/lab/case/thesis/platform.py
+++ b/lab/case/thesis/platform.py
@@ -0,0 +1,176 @@
+"""Thesis platform with real MDP behavioral models and separability scoring."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
+                       PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
+from ...outlet.mechanisms.posted_price import PostedPriceConfig
+from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
+from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
+from .execution import HybridExecutionModel, HybridExecutionConfig
+from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
+
+
+@dataclass
+class ThesisConfig:
+    # instruments
+    n_instruments: int = 10
+    cost_range: tuple[float, float] = (5.0, 50.0)
+    margin_range: tuple[float, float] = (0.2, 0.5)
+
+    # contamination (Section 3.1)
+    alpha_contamination: float = 0.2
+    alpha_drift: float = 0.0
+    alpha_bounds: tuple[float, float] = (0.0, 0.5)
+
+    # objectives (Eq 23)
+    lambda_coi: float = 0.5
+    lambda_ux: float = 0.1
+    lambda_volatility: float = 0.2
+    wasserstein_epsilon: float = 0.1
+
+    # arrivals
+    sessions_per_step: int = 30
+    human_views_range: tuple[int, int] = (1, 4)
+    agent_views_range: tuple[int, int] = (3, 10)
+
+    # inventory
+    initial_inventory: float = 100.0
+    holding_cost_rate: float = 0.002
+
+    # real behavioral models (from sim.rl)
+    use_real_behavior: bool = True
+    use_separability: bool = False  # disabled until classifier trained
+    human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
+    agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
+
+    # simulation
+    max_steps: int = 500
+    seed: int | None = 24
+    log_level: LogLevel = LogLevel.AGG_ONLY
+
+
+def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
+    """Resolve data directories for behavioral models."""
+    base = Path(__file__).parent.parent.parent.parent / "experiments"
+    human = cfg.human_data_dir or str(base / "collected_data")
+    agent = cfg.agent_data_dir or str(base / "agents/collected_data")
+    return human, agent
+
+
+def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
+    """Create platform with real MDP behavioral models.
+
+    Implements:
+    - Contaminated arrivals using learned MDP kernels from behavior_loader
+    - Hybrid execution with real separability scoring from lib.separability
+    - Robust Stackelberg objective (Eq 23)
+    """
+    cfg = cfg or ThesisConfig()
+    rng = np.random.default_rng(cfg.seed)
+    human_dir, agent_dir = _resolve_data_dirs(cfg)
+
+    instruments = make_instruments(
+        n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
+        inst_type=InstrumentType.SKU, rng=rng)
+    instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
+
+    arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
+        base_rate=cfg.sessions_per_step,
+        alpha_contamination=cfg.alpha_contamination,
+        alpha_drift=cfg.alpha_drift,
+        alpha_bounds=cfg.alpha_bounds,
+        human_views_range=cfg.human_views_range,
+        agent_views_range=cfg.agent_views_range,
+        use_real_behavior=cfg.use_real_behavior,
+        human_data_dir=human_dir,
+        agent_data_dir=agent_dir,
+    ))
+
+    execution = HybridExecutionModel(HybridExecutionConfig(
+        use_separability=cfg.use_separability,
+    ))
+
+    mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
+    position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
+
+    market = None
+    objective = RobustStackelbergObjective(RobustObjectiveConfig(
+        lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
+        lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
+
+    obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
+    platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
+                                   seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
+
+    return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
+                    position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
+
+
+@dataclass
+class AblationConfig(ThesisConfig):
+    disable_coi_penalty: bool = False
+    disable_ux_penalty: bool = False
+    disable_contamination: bool = False
+    disable_real_behavior: bool = False
+
+
+def make_ablation_platform(cfg: AblationConfig) -> Platform:
+    if cfg.disable_coi_penalty:
+        cfg.lambda_coi = 0.0
+    if cfg.disable_ux_penalty:
+        cfg.lambda_ux = 0.0
+    if cfg.disable_contamination:
+        cfg.alpha_contamination = 0.0
+    if cfg.disable_real_behavior:
+        cfg.use_real_behavior = False
+        cfg.use_separability = False
+    return make_thesis_platform(cfg)
+
+
+def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
+                        n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
+    """Test performance across contamination levels (Theorem 1 validation)."""
+    from ...experiments.eval import rollout, fixed_price_policy
+
+    results = {}
+    base_cfg = base_cfg or ThesisConfig()
+
+    for alpha in alpha_values:
+        cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
+                          alpha_contamination=alpha)
+        platform = make_thesis_platform(cfg)
+        policy = fixed_price_policy(platform.instruments.refs)
+        result = rollout(platform, policy, n_steps, seed=seed)
+        results[alpha] = {
+            'total_reward': result.total_reward,
+            'total_pnl': result.total_pnl,
+            'avg_conversion': result.avg_conversion,
+            'final_contamination': platform._hidden.contamination,
+        }
+    return results
+
+
+def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
+    """Compare real vs synthetic behavioral models."""
+    from ...experiments.eval import rollout, fixed_price_policy
+
+    base_cfg = base_cfg or ThesisConfig()
+    modes = {
+        'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
+        'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
+        'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
+    }
+
+    results = {}
+    for name, cfg in modes.items():
+        platform = make_thesis_platform(cfg)
+        policy = fixed_price_policy(platform.instruments.refs)
+        result = rollout(platform, policy, n_steps, seed=seed)
+        results[name] = {
+            'total_reward': result.total_reward,
+            'total_pnl': result.total_pnl,
+            'avg_conversion': result.avg_conversion,
+        }
+    return results
--- a/lab/case/thesis/run_experiment.py
+++ b/lab/case/thesis/run_experiment.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+"""Thesis simulation experiments with real MDP behavioral models."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+if __name__ == '__main__':
+    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+
+from lab.case.thesis.platform import make_thesis_platform, ThesisConfig
+from lab.case.thesis.metrics import compute_coi, compute_separability
+from lab.experiments.eval import compare_policies
+import numpy as np
+
+
+def demo_basic_simulation():
+    print("=" * 70)
+    print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)")
+    print("=" * 70)
+
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5,
+                       max_steps=100, seed=42, use_real_behavior=True)
+    platform = make_thesis_platform(cfg)
+
+    print(f"\nInstruments: {platform.instruments.n}")
+    print(f"Reference prices: {platform.instruments.refs.round(2)}")
+    print(f"Costs: {platform.instruments.costs.round(2)}")
+    print(f"Initial contamination alpha={cfg.alpha_contamination}")
+    print(f"Using real behavior: {cfg.use_real_behavior}")
+
+    result = platform.reset(seed=42)
+    total_reward, coi_history = 0, []
+
+    print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}")
+    print("-" * 55)
+
+    for t in range(cfg.max_steps):
+        action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n)
+        result = platform.step(action)
+        total_reward += result.reward
+        coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination)
+        coi_history.append(coi.coi_level)
+
+        if t % 20 == 0:
+            print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} "
+                  f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}")
+
+    print("-" * 55)
+    print(f"Total Reward: {total_reward:.2f}")
+    print(f"Average COI: {np.mean(coi_history):.2f}")
+    print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}")
+
+
+def demo_contamination_sweep():
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)")
+    print("=" * 70)
+
+    from lab.case.thesis.platform import sweep_contamination
+    trials = 20
+    alpha_values = [i/trials for i in range(trials)]
+    results = sweep_contamination(alpha_values, n_steps=100, seed=42)
+
+    print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}")
+    print("-" * 45)
+    for alpha, m in sorted(results.items()):
+        print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}")
+
+    rewards = [results[a]['total_reward'] for a in sorted(results.keys())]
+    dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)])
+    trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1]
+    print(f"Trend (alpha~reward correlation): {trend:.3f}")
+
+
+def demo_policy_comparison():
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: Policy Comparison under Contamination")
+    print("=" * 70)
+
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42)
+    platform = make_thesis_platform(cfg)
+
+    def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0
+    def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0
+    def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0
+    def adaptive_policy(obs, t):
+        fills = obs[platform.instruments.n:2*platform.instruments.n]
+        exp = obs[2*platform.instruments.n:3*platform.instruments.n]
+        conv = np.sum(fills) / (np.sum(exp) + 1e-8)
+        return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0
+
+    policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy,
+                'conservative': conservative_policy, 'adaptive': adaptive_policy}
+    results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42)
+
+    print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}")
+    print("-" * 65)
+    for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']):
+        print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} "
+              f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}")
+
+
+def demo_session_analysis():
+    """Analyze session-level behavior from MDP trajectories."""
+    print("\n" + "=" * 70)
+    print("EXPERIMENT: Session Analysis (Ground Truth)")
+    print("=" * 70)
+
+    from lab.outlet.constants import LogLevel
+    cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50,
+                       log_level=LogLevel.FULL, seed=42, use_real_behavior=True)
+    platform = make_thesis_platform(cfg)
+
+    result = platform.reset(seed=42)
+    human_sessions, agent_sessions = 0, 0
+
+    for t in range(cfg.max_steps):
+        action = platform.instruments.refs * 1.1
+        result = platform.step(action)
+        sep = compute_separability(result.logs, result.hidden.contamination)
+        human_sessions += sep.n_human_sessions
+        agent_sessions += sep.n_agent_sessions
+
+    total = human_sessions + agent_sessions
+    print(f"\nTotal sessions: {total}")
+    print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)")
+    print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)")
+    print(f"True contamination: {cfg.alpha_contamination:.1%}")
+    print(f"Observed contamination: {agent_sessions/total:.1%}")
+
+
+if __name__ == '__main__':
+    demo_basic_simulation()
+    demo_contamination_sweep()
+    # demo_policy_comparison()
+    # demo_session_analysis()
--- a/lab/config.py
+++ b/lab/config.py
@@ -0,0 +1,156 @@
+"""
+Configuration and factory functions for creating pre-configured platforms.
+
+This module provides:
+- RetailConfig, MarketMakingConfig: Configuration dataclasses
+- make_retail_platform: Factory for retail dynamic pricing scenarios
+- make_market_making_platform: Factory for market making scenarios
+
+Example:
+    >>> from lab.config import make_retail_platform
+    >>> platform = make_retail_platform(RetailConfig(n_instruments=5))
+    >>> result = platform.reset(seed=42)
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from .outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
+                     PostedPriceMechanism, TwoSidedMechanism, make_instruments,
+                     InstrumentType, LogLevel)
+from .outlet.mechanisms.posted_price import PostedPriceConfig
+from .outlet.mechanisms.two_sided import TwoSidedConfig
+from .population import (SessionArrivalModel, PoissonArrivalModel, HawkesArrivalModel,
+                         ElasticityExecutionModel, IntensityExecutionModel,
+                         ReactiveCompetitorModel, GBMMarketModel)
+from .population.arrivals import SessionArrivalConfig, PoissonArrivalConfig, HawkesArrivalConfig
+from .population.execution import ElasticityConfig, IntensityConfig
+from .population.competitors import ReactiveCompetitorConfig, GBMMarketConfig
+from .outlet.objectives.factory import retail_objective, market_making_objective
+
+@dataclass
+class RetailConfig:
+    """Configuration for retail dynamic pricing scenario.
+
+    Attributes:
+        n_instruments: Number of products to price
+        cost_range: (min, max) for random product costs
+        margin_range: (min, max) for random initial margins
+        initial_inventory: Starting inventory per product
+        holding_cost_rate: Cost per unit per step for holding
+        sessions_per_step: Number of browsing sessions per step
+        contamination: Fraction of sessions that are scrapers
+        max_steps: Maximum episode length
+        seed: Random seed for reproducibility
+    """
+    n_instruments: int = 10
+    cost_range: tuple[float, float] = (5.0, 50.0)
+    margin_range: tuple[float, float] = (0.2, 0.5)
+    initial_inventory: float = 100.0
+    holding_cost_rate: float = 0.002
+    sessions_per_step: int = 30
+    contamination: float = 0.1
+    max_steps: int = 500
+    seed: int | None = None
+
+def make_retail_platform(cfg: RetailConfig | None = None) -> Platform:
+    """Create a pre-configured retail dynamic pricing platform.
+
+    Components:
+    - Mechanism: PostedPriceMechanism (single price per product)
+    - Arrivals: SessionArrivalModel (browsing sessions with views)
+    - Execution: ElasticityExecutionModel (price sensitivity)
+    - Market: ReactiveCompetitorModel (can trigger price wars)
+    - Objective: PnL - holding_cost - volatility - lost_opportunity
+
+    Args:
+        cfg: Configuration (uses defaults if None)
+
+    Returns:
+        Configured Platform instance
+    """
+    cfg = cfg or RetailConfig()
+    rng = np.random.default_rng(cfg.seed)
+
+    instruments = make_instruments(cfg.n_instruments, cfg.cost_range, cfg.margin_range,
+                                   InstrumentType.SKU, rng)
+    instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
+
+    mechanism = PostedPriceMechanism(PostedPriceConfig())
+    arrival = SessionArrivalModel(SessionArrivalConfig(
+        sessions_per_step=cfg.sessions_per_step, contamination=cfg.contamination))
+    execution = ElasticityExecutionModel(ElasticityConfig())
+    position = PositionModel(PositionConfig(
+        initial_position=cfg.initial_inventory,
+        holding_cost_rate=cfg.holding_cost_rate))
+    market = ReactiveCompetitorModel(ReactiveCompetitorConfig(), refs=instruments.refs)
+    objective = retail_objective()
+
+    return Platform(
+        instruments=instruments, mechanism=mechanism, arrival=arrival,
+        execution=execution, position=position, market=market, objective=objective,
+        cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
+                           seed=cfg.seed, log_level=LogLevel.AGG_ONLY)
+    )
+
+@dataclass
+class MarketMakingConfig:
+    """Configuration for market making scenario.
+
+    Attributes:
+        n_instruments: Number of assets to quote
+        initial_mid: Initial mid-price for assets
+        mu: Price drift (expected return)
+        sigma: Price volatility
+        gamma: Inventory risk aversion parameter
+        base_arrival_rate: Order arrival rate (Hawkes baseline)
+        max_steps: Maximum episode length
+        seed: Random seed for reproducibility
+    """
+    n_instruments: int = 5
+    initial_mid: float = 100.0
+    mu: float = 0.0
+    sigma: float = 0.02
+    gamma: float = 0.1
+    base_arrival_rate: float = 20.0
+    max_steps: int = 1000
+    seed: int | None = None
+
+def make_market_making_platform(cfg: MarketMakingConfig | None = None) -> Platform:
+    """Create a pre-configured market making platform.
+
+    Components:
+    - Mechanism: TwoSidedMechanism (bid-ask spread quoting)
+    - Arrivals: HawkesArrivalModel (clustered order flow)
+    - Execution: IntensityExecutionModel (distance-based fills)
+    - Market: GBMMarketModel (geometric Brownian motion mid-prices)
+    - Objective: PnL + spread_capture - inventory_risk
+
+    Args:
+        cfg: Configuration (uses defaults if None)
+
+    Returns:
+        Configured Platform instance
+    """
+    cfg = cfg or MarketMakingConfig()
+    rng = np.random.default_rng(cfg.seed)
+
+    instruments = make_instruments(cfg.n_instruments, (cfg.initial_mid*0.9, cfg.initial_mid*1.1),
+                                   (0.0, 0.0), InstrumentType.ASSET, rng)
+    instruments.position = np.zeros(cfg.n_instruments)
+
+    mechanism = TwoSidedMechanism(TwoSidedConfig())
+    arrival = HawkesArrivalModel(HawkesArrivalConfig(base_rate=cfg.base_arrival_rate))
+    execution = IntensityExecutionModel(IntensityConfig())
+    position = PositionModel(PositionConfig(
+        initial_position=0.0, min_position=-500, max_position=500,
+        holding_cost_rate=0.0))  # use inventory risk penalty instead
+    market = GBMMarketModel(GBMMarketConfig(mu=cfg.mu, sigma=cfg.sigma),
+                            initial=instruments.refs)
+    objective = market_making_objective(gamma=cfg.gamma, sigma=cfg.sigma)
+
+    return Platform(
+        instruments=instruments, mechanism=mechanism, arrival=arrival,
+        execution=execution, position=position, market=market, objective=objective,
+        cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
+                           seed=cfg.seed, log_level=LogLevel.AGG_ONLY)
+    )
--- a/lab/docs/Makefile
+++ b/lab/docs/Makefile
@@ -0,0 +1,12 @@
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/lab/docs/conf.py
+++ b/lab/docs/conf.py
@@ -0,0 +1,39 @@
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../..'))
+
+project = 'Quote-Control Simulator'
+copyright = '2025, PHANTOM Research'
+author = 'PHANTOM Research'
+release = '0.1.0'
+
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.autosummary',
+]
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+html_theme = 'alabaster'
+html_static_path = ['_static']
+
+autodoc_default_options = {
+    'members': True,
+    'undoc-members': True,
+    'show-inheritance': True,
+}
+
+napoleon_google_docstring = True
+napoleon_numpy_docstring = True
+napoleon_include_init_with_doc = True
+
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'numpy': ('https://numpy.org/doc/stable/', None),
+}
+
+autosummary_generate = True
--- a/lab/docs/index.rst
+++ b/lab/docs/index.rst
@@ -0,0 +1,40 @@
+Quote-Control Simulator
+=======================
+
+Research-grade platform for dynamic pricing and market making experiments.
+
+The platform abstracts pricing as: **Quote → Arrival → Execution → Position**
+
+Supports multiple mechanisms:
+
+* **PostedPrice**: retail dynamic pricing
+* **TwoSided**: market making with bid-ask spreads
+* **Auction**: reserve/shading for auction settings
+
+Quick Start
+-----------
+
+.. code-block:: python
+
+   from lab.config import make_retail_platform
+   from lab.experiments import rollout, fixed_price_policy
+
+   platform = make_retail_platform()
+   policy = fixed_price_policy(platform.instruments.refs)
+   result = rollout(platform, policy, n_steps=100)
+   print(f"Total PnL: {result.total_pnl:.2f}")
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   system_overview
+   modules/outlet
+   modules/population
+   modules/experiments
+
+Indices
+-------
+
+* :ref:`genindex`
+* :ref:`modindex`
--- a/lab/docs/modules/experiments.rst
+++ b/lab/docs/modules/experiments.rst
@@ -0,0 +1,14 @@
+Experiments
+===========
+
+Evaluation & OPE
+----------------
+
+.. automodule:: lab.experiments.eval
+   :members:
+
+Configuration
+-------------
+
+.. automodule:: lab.config
+   :members:
--- a/lab/docs/modules/outlet.rst
+++ b/lab/docs/modules/outlet.rst
@@ -0,0 +1,77 @@
+Outlet (Core Simulator)
+=======================
+
+Types
+-----
+
+.. automodule:: lab.outlet.types
+   :members:
+
+Constants
+---------
+
+.. automodule:: lab.outlet.constants
+   :members:
+
+Protocols
+---------
+
+.. automodule:: lab.outlet.protocols
+   :members:
+
+Platform
+--------
+
+.. automodule:: lab.outlet.platform
+   :members:
+
+Stock & Position
+----------------
+
+.. automodule:: lab.outlet.stock
+   :members:
+
+Observation
+-----------
+
+.. automodule:: lab.outlet.observation
+   :members:
+
+Mechanisms
+----------
+
+Posted Price
+~~~~~~~~~~~~
+
+.. automodule:: lab.outlet.mechanisms.posted_price
+   :members:
+
+Two-Sided (Market Making)
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: lab.outlet.mechanisms.two_sided
+   :members:
+
+Auction
+~~~~~~~
+
+.. automodule:: lab.outlet.mechanisms.auction
+   :members:
+
+Objectives
+----------
+
+.. automodule:: lab.outlet.objectives.base
+   :members:
+
+.. automodule:: lab.outlet.objectives.penalties
+   :members:
+
+.. automodule:: lab.outlet.objectives.factory
+   :members:
+
+Math Utilities
+--------------
+
+.. automodule:: lab.outlet.math_util
+   :members:
--- a/lab/docs/modules/population.rst
+++ b/lab/docs/modules/population.rst
@@ -0,0 +1,20 @@
+Population Models
+=================
+
+Arrival Models
+--------------
+
+.. automodule:: lab.population.arrivals
+   :members:
+
+Execution Models
+----------------
+
+.. automodule:: lab.population.execution
+   :members:
+
+Competitor / Market Models
+--------------------------
+
+.. automodule:: lab.population.competitors
+   :members:
--- a/lab/docs/system_overview.rst
+++ b/lab/docs/system_overview.rst
@@ -0,0 +1,97 @@
+System Overview
+===============
+
+The simulator organises dynamic pricing and market-making experiments as a
+closed loop with the following stages:
+
+* **Quote** – a policy or agent emits a :class:`lab.outlet.types.Quote`. The
+  quote is normalised and validated by a concrete
+  :class:`lab.outlet.protocols.Mechanism` implementation
+  (posted-price, two-sided, auction).
+* **Arrival** – a :class:`lab.outlet.protocols.ArrivalModel` samples a stream of
+  :class:`lab.outlet.types.Opportunity` objects given the current time,
+  instrument catalogue, and market state.
+* **Execution** – the :class:`lab.outlet.protocols.ExecutionModel` converts an
+  opportunity into a probabilistic fill using the active quote, optional
+  competitor prices, and demand-side context.
+* **Position** – a :class:`lab.outlet.protocols.PositionModel` enforces
+  inventory or position constraints, censors oversized fills, and accrues
+  holding and shortage costs.
+* **Observation & Reward** – the
+  :class:`lab.outlet.protocols.ObservationBuilder` constructs the censored view
+  exposed to the agent, while a :class:`lab.outlet.protocols.Objective`
+  transforms :class:`lab.outlet.types.StepMetrics` into a scalar reward with an
+  optional breakdown per term.
+
+These components are orchestrated by :class:`lab.outlet.platform.Platform`,
+which manages internal hidden state, deterministic seeding, and logging.
+
+Component Matrix
+----------------
+
+===============================  ==============================================
+Layer                            Responsibilities / Examples
+===============================  ==============================================
+Mechanisms                       Quote normalisation, execution semantics
+                                 (`posted_price`, `two_sided`, `auction`).
+Population models                Arrivals (:mod:`lab.population.arrivals`),
+                                 execution probability models
+                                 (:mod:`lab.population.execution`), and
+                                 competitor or market dynamics
+                                 (:mod:`lab.population.competitors`).
+Position management              Inventory limits, replenishment, holding and
+                                 shortage costs (:mod:`lab.outlet.stock`).
+Observation & logging            Censored observations and optional event logs
+                                 (:mod:`lab.outlet.observation`).
+Objectives                       Reward composition utilities
+                                 (:mod:`lab.outlet.objectives`).
+Experiments                      Rollout helpers, baseline policies, off-policy
+                                 evaluation (:mod:`lab.experiments.eval`).
+===============================  ==============================================
+
+Preconfigured Platforms
+-----------------------
+
+Two high-level factories in :mod:`lab.config` wire common combinations of the
+building blocks:
+
+* **Retail dynamic pricing** – posted-price mechanism, session arrivals with
+  contamination, elasticity-based executions, reactive competitor model, and a
+  composite objective that penalises volatility, holding costs, and lost
+  opportunities.
+* **Market making** – two-sided quoting, Hawkes order flow, intensity-based
+  executions, geometric Brownian motion mid-prices, and an objective combining
+  PnL, spread capture, and quadratic inventory risk.
+
+State & Reset Behaviour
+-----------------------
+
+When you call :meth:`lab.outlet.platform.Platform.reset`, the platform resets
+instrument positions, quotes, and hidden state, but component implementations
+may maintain their own internal buffers. For reproducible experiments:
+
+* Reuse freshly instantiated arrival/market models per episode, or add explicit
+  ``reset`` methods if the model keeps history (for example,
+  :class:`lab.population.arrivals.HawkesArrivalModel` maintains an event
+  history, while :class:`lab.population.competitors.ReactiveCompetitorModel`
+  tracks prior competitor quotes).
+* Seed randomness through the factory configuration (``RetailConfig.seed`` or
+  ``MarketMakingConfig.seed``) or pass a seed to ``Platform.reset`` for
+  deterministic rollouts.
+
+Extending the Platform
+----------------------
+
+To support a new domain:
+
+1. Create custom Mechanism/Arrival/Execution/Market/Observation components by
+   implementing the respective protocol in :mod:`lab.outlet.protocols`.
+2. Compose a new objective with
+   :func:`lab.outlet.objectives.factory.make_composite` or write a bespoke
+   :class:`lab.outlet.objectives.base.BaseObjective`.
+3. Wire everything together via :class:`lab.outlet.platform.Platform` directly
+   or expose a helper factory in :mod:`lab.config`.
+
+Use :func:`lab.experiments.rollout` and
+:func:`lab.experiments.compare_policies` to benchmark candidate policies under
+multiple random seeds, collecting per-step logs for analysis or OPE.
--- a/lab/experiments/init.py
+++ b/lab/experiments/init.py
@@ -0,0 +1,7 @@
+from .eval import (rollout, RolloutResult, compare_policies, compute_ips, OPEResult,
+                   fixed_price_policy, cost_plus_margin_policy, random_walk_policy, epsilon_greedy_policy)
+
+__all__ = [
+    'rollout', 'RolloutResult', 'compare_policies', 'compute_ips', 'OPEResult',
+    'fixed_price_policy', 'cost_plus_margin_policy', 'random_walk_policy', 'epsilon_greedy_policy',
+]
--- a/lab/experiments/eval.py
+++ b/lab/experiments/eval.py
@@ -0,0 +1,213 @@
+"""
+Evaluation utilities for policy testing and off-policy evaluation.
+
+This module provides:
+- rollout: Run a policy on the platform for multiple steps
+- compare_policies: Compare multiple policies with statistics
+- Baseline policies: fixed_price, cost_plus_margin, random_walk, epsilon_greedy
+- OPE estimators: IPS and SNIPS for off-policy evaluation
+
+Example:
+    >>> from lab.config import make_retail_platform
+    >>> from lab.experiments.eval import rollout, fixed_price_policy
+    >>> platform = make_retail_platform()
+    >>> policy = fixed_price_policy(platform.instruments.refs)
+    >>> result = rollout(platform, policy, n_steps=100)
+    >>> print(f"Total PnL: {result.total_pnl:.2f}")
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Callable, Any
+import numpy as np
+from ..outlet.platform import Platform
+from ..outlet.types import StepResult, StepLogs, Quote
+
+# Policy signature: takes (observation_flat, timestep) -> (action_prices, propensity)
+Policy = Callable[[np.ndarray, int], tuple[np.ndarray, float]]
+
+@dataclass
+class RolloutResult:
+    """Results from a policy rollout.
+
+    Attributes:
+        rewards: Per-step rewards
+        metrics: Per-step StepMetrics objects
+        logs: Per-step StepLogs objects
+        total_reward: Sum of rewards
+        total_pnl: Sum of PnL from metrics
+        avg_conversion: Average conversion rate
+    """
+    rewards: list[float]
+    metrics: list[Any]
+    logs: list[StepLogs]
+    total_reward: float
+    total_pnl: float
+    avg_conversion: float
+
+def rollout(platform: Platform, policy: Policy, n_steps: int, seed: int | None = None) -> RolloutResult:
+    """Execute a policy on the platform for n_steps.
+
+    Args:
+        platform: The simulation platform
+        policy: Function (obs, t) -> (action, propensity)
+        n_steps: Number of steps to run
+        seed: Random seed for reproducibility
+
+    Returns:
+        RolloutResult with rewards, metrics, and summary statistics
+    """
+    result = platform.reset(seed)
+    rewards, metrics, logs = [], [], []
+
+    for t in range(n_steps):
+        obs_flat = result.obs.to_flat()
+        action, propensity = policy(obs_flat, t)
+        result = platform.step(action, propensity)
+        rewards.append(result.reward)
+        metrics.append(result.metrics)
+        logs.append(result.logs)
+        if result.terminated or result.truncated:
+            break
+
+    return RolloutResult(
+        rewards=rewards, metrics=metrics, logs=logs,
+        total_reward=sum(rewards),
+        total_pnl=sum(m.pnl for m in metrics),
+        avg_conversion=np.mean([m.conversion for m in metrics])
+    )
+
+# Baseline policies for comparison
+
+def fixed_price_policy(refs: np.ndarray) -> Policy:
+    """Policy that always quotes at reference prices."""
+    def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
+        return refs.copy(), 1.0
+    return policy
+
+def cost_plus_margin_policy(costs: np.ndarray, margin: float = 0.3) -> Policy:
+    """Policy that quotes at cost * (1 + margin)."""
+    prices = costs * (1 + margin)
+    def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
+        return prices.copy(), 1.0
+    return policy
+
+def random_walk_policy(refs: np.ndarray, volatility: float = 0.05,
+                       rng: np.random.Generator | None = None) -> Policy:
+    """Policy that performs a random walk around reference prices."""
+    rng = rng or np.random.default_rng()
+    prices = refs.copy()
+    def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
+        nonlocal prices
+        delta = rng.normal(0, volatility, len(prices))
+        prices = prices * (1 + delta)
+        prices = np.clip(prices, refs * 0.5, refs * 2.0)
+        return prices.copy(), 1.0
+    return policy
+
+def epsilon_greedy_policy(base_policy: Policy, refs: np.ndarray,
+                          epsilon: float = 0.1, rng: np.random.Generator | None = None) -> Policy:
+    """Wrap a policy with epsilon-greedy exploration."""
+    rng = rng or np.random.default_rng()
+    def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
+        if rng.random() < epsilon:
+            action = refs * rng.uniform(0.8, 1.2, len(refs))
+            return action, epsilon / len(refs)
+        else:
+            action, _ = base_policy(obs, t)
+            return action, 1 - epsilon
+    return policy
+
+# Off-Policy Evaluation (OPE)
+
+@dataclass
+class OPEResult:
+    """Results from off-policy evaluation.
+
+    Attributes:
+        ips_estimate: Inverse Propensity Scoring estimate
+        snips_estimate: Self-normalized IPS estimate (more stable)
+        n_samples: Number of samples used
+        effective_samples: Effective sample size (accounts for variance)
+    """
+    ips_estimate: float
+    snips_estimate: float
+    n_samples: int
+    effective_samples: float
+
+def compute_ips(logs: list[StepLogs], rewards: list[float],
+                target_policy: Policy, behavior_propensities: list[float] | None = None) -> OPEResult:
+    """Compute IPS and SNIPS estimators for off-policy evaluation.
+
+    Uses logged propensities to estimate expected reward under a target
+    policy from data collected under a behavior policy.
+
+    Args:
+        logs: Step logs containing propensities
+        rewards: Observed rewards from behavior policy
+        target_policy: Policy to evaluate (not currently used, assumes deterministic)
+        behavior_propensities: Override propensities if not in logs
+
+    Returns:
+        OPEResult with IPS, SNIPS estimates and sample statistics
+    """
+    if behavior_propensities is None:
+        # extract from logs
+        behavior_propensities = []
+        for log in logs:
+            if log.executions:
+                avg_prop = np.mean([e.propensity for e in log.executions])
+            else:
+                avg_prop = 1.0
+            behavior_propensities.append(avg_prop)
+
+    # compute importance weights
+    weights = []
+    for i, (log, bp) in enumerate(zip(logs, behavior_propensities)):
+        # target propensity would need obs reconstruction - simplified here
+        tp = 1.0  # assume deterministic target
+        w = tp / (bp + 1e-8)
+        weights.append(w)
+
+    weights = np.array(weights)
+    rewards = np.array(rewards)
+
+    # IPS estimate
+    ips = np.sum(weights * rewards) / len(rewards)
+
+    # SNIPS (self-normalized)
+    snips = np.sum(weights * rewards) / (np.sum(weights) + 1e-8)
+
+    # effective sample size
+    ess = (np.sum(weights) ** 2) / (np.sum(weights ** 2) + 1e-8)
+
+    return OPEResult(ips_estimate=ips, snips_estimate=snips,
+                     n_samples=len(rewards), effective_samples=ess)
+
+def compare_policies(platform: Platform, policies: dict[str, Policy],
+                     n_steps: int = 100, n_runs: int = 5, seed: int = 42) -> dict[str, dict]:
+    """Compare multiple policies with statistical summary.
+
+    Args:
+        platform: Simulation platform
+        policies: Dict mapping policy names to policy functions
+        n_steps: Steps per rollout
+        n_runs: Number of rollouts per policy (different seeds)
+        seed: Base random seed
+
+    Returns:
+        Dict mapping policy names to result dicts with mean/std statistics
+    """
+    results = {}
+    for name, policy in policies.items():
+        run_results = []
+        for i in range(n_runs):
+            r = rollout(platform, policy, n_steps, seed=seed + i)
+            run_results.append(r)
+
+        results[name] = {
+            'mean_reward': np.mean([r.total_reward for r in run_results]),
+            'std_reward': np.std([r.total_reward for r in run_results]),
+            'mean_pnl': np.mean([r.total_pnl for r in run_results]),
+            'mean_conversion': np.mean([r.avg_conversion for r in run_results]),
+        }
+    return results
--- a/lab/outlet/init.py
+++ b/lab/outlet/init.py
@@ -0,0 +1,17 @@
+from .constants import Side, MechanismType, InstrumentType, OpportunityType, EventType, LogLevel
+from .types import (Instrument, InstrumentSet, Quote, Opportunity, Execution,
+                    StepEvent, StepLogs, StepMetrics, MarketState, HiddenState, Observation, StepResult)
+from .stock import PositionModel, PositionConfig, make_instruments
+from .platform import Platform, PlatformConfig
+from .observation import DefaultObservationBuilder, ObservationConfig
+from .mechanisms import PostedPriceMechanism, TwoSidedMechanism, AuctionMechanism
+
+__all__ = [
+    'Side', 'MechanismType', 'InstrumentType', 'OpportunityType', 'EventType', 'LogLevel',
+    'Instrument', 'InstrumentSet', 'Quote', 'Opportunity', 'Execution',
+    'StepEvent', 'StepLogs', 'StepMetrics', 'MarketState', 'HiddenState', 'Observation', 'StepResult',
+    'PositionModel', 'PositionConfig', 'make_instruments',
+    'Platform', 'PlatformConfig',
+    'DefaultObservationBuilder', 'ObservationConfig',
+    'PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism',
+]
--- a/lab/outlet/constants.py
+++ b/lab/outlet/constants.py
@@ -0,0 +1,83 @@
+"""
+Constants and enumerations for the Quote-Control simulator.
+
+This module defines the core enums used throughout the platform to ensure
+type safety and consistent semantics across different pricing mechanisms.
+"""
+from enum import Enum, auto
+
+class Side(Enum):
+    """Transaction side indicator.
+
+    Attributes:
+        BUY: Buyer-initiated transaction (customer purchases, market buy order)
+        SELL: Seller-initiated transaction (market sell order, short sale)
+    """
+    BUY = auto()
+    SELL = auto()
+
+class MechanismType(Enum):
+    """Pricing mechanism type defining how quotes translate to executions.
+
+    Attributes:
+        POSTED_PRICE: Single posted price per instrument (retail dynamic pricing)
+        TWO_SIDED_QUOTE: Bid-ask spread quoting (market making, liquidity provision)
+        AUCTION: Reserve price or bid shading (ad auctions, marketplaces)
+    """
+    POSTED_PRICE = auto()
+    TWO_SIDED_QUOTE = auto()
+    AUCTION = auto()
+
+class InstrumentType(Enum):
+    """Type of instrument being priced.
+
+    Attributes:
+        SKU: Retail product with inventory constraints
+        ASSET: Financial instrument with position limits
+        LOAN: Credit product with interest rate pricing
+        SUBSCRIPTION: Recurring service with periodic fees
+    """
+    SKU = auto()
+    ASSET = auto()
+    LOAN = auto()
+    SUBSCRIPTION = auto()
+
+class OpportunityType(Enum):
+    """Type of arrival opportunity.
+
+    Attributes:
+        SESSION: Retail browsing session with potential purchase intent
+        MARKET_ORDER: Financial market order arrival (buy or sell)
+        REQUEST: Service or credit request requiring quote response
+    """
+    SESSION = auto()
+    MARKET_ORDER = auto()
+    REQUEST = auto()
+
+class EventType(Enum):
+    """Type of logged event during simulation.
+
+    Attributes:
+        ARRIVAL: New opportunity arrived in the system
+        EXPOSURE: Quote was shown to an arrival
+        EXECUTION: Transaction was executed
+        ABANDON: Opportunity abandoned without execution
+        CANCEL: Pending order was cancelled
+    """
+    ARRIVAL = auto()
+    EXPOSURE = auto()
+    EXECUTION = auto()
+    ABANDON = auto()
+    CANCEL = auto()
+
+class LogLevel(Enum):
+    """Verbosity level for step logging.
+
+    Attributes:
+        NONE: No logging, fastest execution
+        AGG_ONLY: Only aggregate statistics per step
+        FULL: Full event-level logging with propensities for OPE
+    """
+    NONE = auto()
+    AGG_ONLY = auto()
+    FULL = auto()
--- a/lab/outlet/gym_wrapper.py
+++ b/lab/outlet/gym_wrapper.py
@@ -0,0 +1,86 @@
+"""
+Gymnasium-compatible wrapper for the Quote-Control platform.
+
+Provides a standard Gym interface for RL training:
+- observation_space: Box space with flattened observation
+- action_space: Box space with price multipliers [0.5, 2.0]
+- reset(), step(), render(), close() methods
+
+Example:
+    >>> from lab.config import make_retail_platform
+    >>> from lab.outlet.gym_wrapper import QuoteGymEnv
+    >>> env = QuoteGymEnv(make_retail_platform())
+    >>> obs, info = env.reset()
+    >>> obs, reward, done, truncated, info = env.step(env.action_space.sample())
+"""
+from __future__ import annotations
+from typing import Any
+import numpy as np
+
+try:
+    import gymnasium as gym
+    from gymnasium import spaces
+    HAS_GYM = True
+except ImportError:
+    HAS_GYM = False
+
+from .platform import Platform, PlatformConfig
+from .types import Quote, InstrumentSet, StepResult
+
+class QuoteGymEnv:
+    """Gymnasium-compatible environment wrapper.
+
+    Wraps a Platform instance with standard Gym interface.
+    Actions are price multipliers in [0.5, 2.0] applied to reference prices.
+    Observations are flattened numpy arrays containing quotes, fills, exposures.
+    """
+
+    def __init__(self, platform: Platform):
+        if not HAS_GYM:
+            raise ImportError("gymnasium required for QuoteGymEnv")
+        self.platform = platform
+        self.n = platform.instruments.n
+        self._last_result: StepResult | None = None
+
+        # action space: price adjustments as multipliers [0.5, 2.0]
+        self.action_space = spaces.Box(low=0.5, high=2.0, shape=(self.n,), dtype=np.float32)
+
+        # observation space
+        obs_dim = self.n * 4  # quotes + fills + exposures + position
+        if platform.market:
+            obs_dim += self.n  # competitor quotes
+        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
+                                            shape=(obs_dim,), dtype=np.float32)
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> tuple[np.ndarray, dict]:
+        result = self.platform.reset(seed)
+        self._last_result = result
+        return result.obs.to_flat().astype(np.float32), result.info
+
+    def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
+        # convert action (multipliers) to absolute prices
+        refs = self.platform.instruments.refs
+        prices = refs * action
+        result = self.platform.step(prices)
+        self._last_result = result
+        return (result.obs.to_flat().astype(np.float32), result.reward,
+                result.terminated, result.truncated, result.info)
+
+    def render(self) -> None:
+        if self._last_result:
+            m = self._last_result.metrics
+            print(f"t={self.platform._t} pnl={m.pnl:.2f} units={m.units_traded:.0f} "
+                  f"conv={m.conversion:.3f} vol={m.volatility:.3f}")
+
+    def close(self) -> None:
+        pass
+
+def make_env(platform: Platform) -> QuoteGymEnv:
+    return QuoteGymEnv(platform)
+
+if HAS_GYM:
+    # register if gymnasium available
+    try:
+        gym.register(id='QuoteControl-v0', entry_point='outlet.gym_wrapper:QuoteGymEnv')
+    except:
+        pass  # already registered or other issue
--- a/lab/outlet/math_util.py
+++ b/lab/outlet/math_util.py
@@ -0,0 +1,57 @@
+"""
+Numerical utilities for stable computation.
+
+This module provides numerically stable implementations of common operations:
+- safe_exp, safe_log: Avoid overflow/underflow
+- softmax: Numerically stable softmax
+- sigmoid, clamp: Standard transformations
+- intensity_decay: Avellaneda-Stoikov fill intensity
+- inventory_penalty: Quadratic inventory risk
+- poisson_arrivals, hawkes_intensity: Arrival process helpers
+
+All functions accept both scalars and numpy arrays.
+"""
+import numpy as np
+
+EPS = 1e-8  # small constant to avoid division by zero
+MAX_EXP = 700.0  # maximum safe exponent to avoid overflow
+
+def safe_exp(x: np.ndarray | float) -> np.ndarray | float:
+    return np.exp(np.clip(x, -MAX_EXP, MAX_EXP))
+
+def safe_log(x: np.ndarray | float) -> np.ndarray | float:
+    return np.log(np.maximum(x, EPS))
+
+def clamp(x: np.ndarray | float, lo: float, hi: float) -> np.ndarray | float:
+    return np.clip(x, lo, hi)
+
+def sigmoid(x: np.ndarray | float) -> np.ndarray | float:
+    return 1.0 / (1.0 + safe_exp(-x))
+
+def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
+    x_max = np.max(x, axis=axis, keepdims=True)
+    exp_x = safe_exp(x - x_max)
+    return exp_x / (np.sum(exp_x, axis=axis, keepdims=True) + EPS)
+
+def geometric_series(base: float, ratio: float, n: int) -> np.ndarray:
+    return base * (ratio ** np.arange(n))
+
+def ema(old: float, new: float, alpha: float = 0.1) -> float:
+    return alpha * new + (1 - alpha) * old
+
+def intensity_decay(distance: float, kappa: float = 1.0) -> float:
+    """Avellaneda-Stoikov style fill intensity decay with quote distance"""
+    return safe_exp(-kappa * distance)
+
+def inventory_penalty(q: float, gamma: float = 0.1, sigma: float = 1.0) -> float:
+    """Quadratic inventory risk penalty"""
+    return gamma * sigma**2 * q**2 / 2
+
+def poisson_arrivals(rate: float, dt: float, rng: np.random.Generator) -> int:
+    return rng.poisson(rate * dt)
+
+def hawkes_intensity(base: float, history: np.ndarray, alpha: float, beta: float, t: float) -> float:
+    """Self-exciting Hawkes process intensity"""
+    if len(history) == 0: return base
+    decays = safe_exp(-beta * (t - history[history < t]))
+    return base + alpha * np.sum(decays)
--- a/lab/outlet/mechanisms/init.py
+++ b/lab/outlet/mechanisms/init.py
@@ -0,0 +1,5 @@
+from .posted_price import PostedPriceMechanism
+from .two_sided import TwoSidedMechanism
+from .auction import AuctionMechanism
+
+__all__ = ['PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism']
--- a/lab/outlet/mechanisms/auction.py
+++ b/lab/outlet/mechanisms/auction.py
@@ -0,0 +1,73 @@
+"""
+Auction mechanism for reserve pricing and bid shading.
+
+In this mechanism, the agent sets reserve prices that affect
+win probability and clearing prices. Used for ad auctions,
+marketplace auctions, and similar settings.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
+from ..constants import Side
+from ..math_util import clamp, sigmoid
+
+@dataclass
+class AuctionConfig:
+    """Configuration for auction mechanism.
+
+    Attributes:
+        min_reserve: Minimum reserve price
+        max_reserve: Maximum reserve price
+        base_win_prob: Baseline win probability at reference reserve
+        sensitivity: How much higher reserves reduce win probability
+    """
+    min_reserve: float = 0.0
+    max_reserve: float = 100.0
+    base_win_prob: float = 0.3
+    sensitivity: float = 2.0
+
+class AuctionMechanism:
+    """Auction mechanism for reserve pricing.
+
+    The agent sets reserve prices that affect:
+    - Win probability: higher reserves reduce chance of winning
+    - Clearing price: bounded between reserve and simulated max bid
+
+    Win probability: base_prob * sigmoid(-sensitivity * (reserve - ref) / ref)
+    Clearing price: max(reserve, min(max_bid, reserve + random_increment))
+
+    Only BUY-side opportunities are processed (auction wins).
+    """
+
+    def __init__(self, cfg: AuctionConfig | None = None):
+        self.cfg = cfg or AuctionConfig()
+
+    def apply_quote(self, quote: Quote, instruments: InstrumentSet,
+                    rng: np.random.Generator) -> Quote:
+        reserves = clamp(quote.prices, self.cfg.min_reserve, self.cfg.max_reserve)
+        return Quote(prices=reserves, propensity=quote.propensity, metadata=quote.metadata)
+
+    def process_opportunity(self, opp: Opportunity, quote: Quote,
+                            instruments: InstrumentSet, market: MarketState | None,
+                            rng: np.random.Generator) -> Execution | None:
+        if opp.side != Side.BUY: return None
+        idx = int(opp.instrument_id)
+        reserve = float(quote.prices[idx])
+        ref = instruments.refs[idx]
+
+        # win probability decreases with higher reserve
+        relative_reserve = (reserve - ref) / (ref + 1e-8)
+        win_prob = self.cfg.base_win_prob * sigmoid(-self.cfg.sensitivity * relative_reserve)
+
+        if rng.random() > win_prob: return None
+
+        # clearing price is between reserve and some max bid (simulated)
+        max_bid = ref * (1 + rng.exponential(0.2))
+        clearing = max(reserve, min(max_bid, reserve + rng.exponential(0.1) * ref))
+
+        return Execution(
+            opportunity_id=opp.id, instrument_id=opp.instrument_id,
+            side=opp.side, size_requested=opp.size, size_filled=opp.size,
+            price=clearing, propensity=quote.propensity * win_prob, t=opp.t
+        )
--- a/lab/outlet/mechanisms/posted_price.py
+++ b/lab/outlet/mechanisms/posted_price.py
@@ -0,0 +1,84 @@
+"""
+Posted price mechanism for retail dynamic pricing.
+
+In this mechanism, the agent posts a single price per instrument.
+Buyers decide whether to purchase based on the posted price.
+This is the standard e-commerce dynamic pricing model.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
+from ..constants import Side
+from ..math_util import clamp
+
+@dataclass
+class PostedPriceConfig:
+    """Configuration for posted price mechanism.
+
+    Attributes:
+        min_price: Absolute minimum price
+        max_price: Absolute maximum price
+        max_delta_pct: Maximum price change per step as fraction of previous
+        min_margin_pct: Minimum margin over cost basis
+        round_to: Price rounding granularity (None = no rounding)
+    """
+    min_price: float = 0.01
+    max_price: float = 1000.0
+    max_delta_pct: float = 0.2
+    min_margin_pct: float = 0.05
+    round_to: float | None = 0.01
+
+class PostedPriceMechanism:
+    """Posted price mechanism for retail dynamic pricing.
+
+    The agent posts a single price per product. Constraints enforced:
+    - Prices within [min_price, max_price]
+    - Margin at least min_margin_pct above cost
+    - Price changes limited to max_delta_pct per step
+    - Prices rounded to round_to granularity
+
+    Only BUY-side opportunities are processed (customers purchasing).
+    """
+
+    def __init__(self, cfg: PostedPriceConfig | None = None):
+        self.cfg = cfg or PostedPriceConfig()
+
+    def apply_quote(self, quote: Quote, instruments: InstrumentSet,
+                    rng: np.random.Generator) -> Quote:
+        prices = quote.prices.copy()
+        costs = instruments.costs
+        refs = instruments.refs
+        c = self.cfg
+
+        # enforce min margin
+        min_prices = costs * (1 + c.min_margin_pct)
+        prices = np.maximum(prices, min_prices)
+
+        # enforce absolute bounds
+        prices = clamp(prices, c.min_price, c.max_price)
+
+        # enforce max delta if we have history
+        if 'prev_prices' in quote.metadata:
+            prev = quote.metadata['prev_prices']
+            max_change = prev * c.max_delta_pct
+            prices = clamp(prices, prev - max_change, prev + max_change)
+
+        # round prices
+        if c.round_to:
+            prices = np.round(prices / c.round_to) * c.round_to
+
+        return Quote(prices=prices, propensity=quote.propensity,
+                     metadata={**quote.metadata, 'prev_prices': prices})
+
+    def process_opportunity(self, opp: Opportunity, quote: Quote,
+                            instruments: InstrumentSet, market: MarketState | None,
+                            rng: np.random.Generator) -> Execution | None:
+        if opp.side != Side.BUY: return None  # posted price is buy-only
+        idx = int(opp.instrument_id)
+        price = float(quote.prices[idx])
+        return Execution(
+            opportunity_id=opp.id, instrument_id=opp.instrument_id,
+            side=opp.side, size_requested=opp.size, size_filled=opp.size,
+            price=price, propensity=quote.propensity, t=opp.t
+        )
--- a/lab/outlet/mechanisms/two_sided.py
+++ b/lab/outlet/mechanisms/two_sided.py
@@ -0,0 +1,89 @@
+"""
+Two-sided quoting mechanism for market making.
+
+In this mechanism, the agent posts both bid and ask prices.
+Execution depends on the distance from the market mid-price.
+This models liquidity provision in financial markets.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
+from ..constants import Side
+from ..math_util import clamp, intensity_decay
+
+@dataclass
+class TwoSidedConfig:
+    """Configuration for two-sided quoting mechanism.
+
+    Attributes:
+        min_spread: Minimum bid-ask spread
+        max_spread: Maximum bid-ask spread
+        min_price: Absolute minimum price
+        max_price: Absolute maximum price
+        fill_kappa: Intensity decay parameter (higher = faster decay with distance)
+    """
+    min_spread: float = 0.01
+    max_spread: float = 0.5
+    min_price: float = 0.01
+    max_price: float = 10000.0
+    fill_kappa: float = 1.5
+
+class TwoSidedMechanism:
+    """Two-sided quoting mechanism for market making.
+
+    The agent posts bid (buy) and ask (sell) prices around a mid-point.
+    Fill probability decays exponentially with distance from mid-price,
+    following the Avellaneda-Stoikov intensity model.
+
+    Both BUY and SELL opportunities are processed:
+    - BUY: customer buys at agent's ask price
+    - SELL: customer sells at agent's bid price
+    """
+
+    def __init__(self, cfg: TwoSidedConfig | None = None):
+        self.cfg = cfg or TwoSidedConfig()
+
+    def apply_quote(self, quote: Quote, instruments: InstrumentSet,
+                    rng: np.random.Generator) -> Quote:
+        prices = quote.prices.copy()
+        spreads = quote.spreads.copy() if quote.spreads is not None else np.full_like(prices, 0.02)
+        c = self.cfg
+
+        prices = clamp(prices, c.min_price, c.max_price)
+        spreads = clamp(spreads, c.min_spread, c.max_spread)
+
+        # ensure bids < asks
+        half_spread = spreads / 2
+        bids = prices - half_spread
+        asks = prices + half_spread
+        bids = np.maximum(bids, c.min_price)
+        asks = np.minimum(asks, c.max_price)
+        spreads = asks - bids
+        prices = (bids + asks) / 2
+
+        return Quote(prices=prices, spreads=spreads, propensity=quote.propensity,
+                     metadata=quote.metadata)
+
+    def process_opportunity(self, opp: Opportunity, quote: Quote,
+                            instruments: InstrumentSet, market: MarketState | None,
+                            rng: np.random.Generator) -> Execution | None:
+        idx = int(opp.instrument_id)
+        mid = market.mid_prices[idx] if market and market.mid_prices is not None else quote.prices[idx]
+
+        if opp.side == Side.BUY:
+            price = float(quote.asks[idx]) if quote.asks is not None else float(quote.prices[idx])
+            distance = price - mid
+        else:
+            price = float(quote.bids[idx]) if quote.bids is not None else float(quote.prices[idx])
+            distance = mid - price
+
+        # probabilistic fill based on distance from mid
+        fill_prob = intensity_decay(abs(distance), self.cfg.fill_kappa)
+        if rng.random() > fill_prob: return None
+
+        return Execution(
+            opportunity_id=opp.id, instrument_id=opp.instrument_id,
+            side=opp.side, size_requested=opp.size, size_filled=opp.size,
+            price=price, propensity=quote.propensity * fill_prob, t=opp.t
+        )
--- a/lab/outlet/objectives/init.py
+++ b/lab/outlet/objectives/init.py
@@ -0,0 +1,11 @@
+from .base import BaseObjective, CompositeObjective
+from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
+                        LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
+from .factory import make_objective, make_composite, retail_objective, market_making_objective
+
+__all__ = [
+    'BaseObjective', 'CompositeObjective',
+    'PnLObjective', 'VolatilityPenalty', 'HoldingCostPenalty',
+    'LostOpportunityCostPenalty', 'InventoryRiskPenalty', 'SpreadCaptureReward',
+    'make_objective', 'make_composite', 'retail_objective', 'market_making_objective',
+]
--- a/lab/outlet/objectives/base.py
+++ b/lab/outlet/objectives/base.py
@@ -0,0 +1,48 @@
+"""
+Base classes for reward objectives.
+
+Objectives compute scalar rewards from step metrics. The CompositeObjective
+allows combining multiple objectives with weights for multi-objective optimization.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+
+class BaseObjective(ABC):
+    """Abstract base class for reward objectives.
+
+    Subclasses must implement reward() and breakdown() methods.
+    """
+
+    @abstractmethod
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ...
+
+    @abstractmethod
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ...
+
+class CompositeObjective(BaseObjective):
+    """Weighted sum of multiple objectives.
+
+    Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility).
+
+    Args:
+        objectives: List of (objective, weight) tuples
+    """
+
+    def __init__(self, objectives: list[tuple[BaseObjective, float]]):
+        self.objectives = objectives
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return sum(w * obj.reward(quote, instruments, metrics, hidden, obs)
+                   for obj, w in self.objectives)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        bd = {}
+        for obj, w in self.objectives:
+            for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items():
+                bd[k] = w * v
+        return bd
--- a/lab/outlet/objectives/factory.py
+++ b/lab/outlet/objectives/factory.py
@@ -0,0 +1,82 @@
+"""
+Factory functions for creating objectives.
+
+Provides:
+- make_objective: Create single objective by name
+- make_composite: Create weighted combination of objectives
+- retail_objective: Default objective for retail pricing
+- market_making_objective: Default objective for market making
+"""
+from __future__ import annotations
+from .base import BaseObjective, CompositeObjective
+from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
+                        LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
+
+REGISTRY: dict[str, type[BaseObjective]] = {
+    'pnl': PnLObjective,
+    'volatility': VolatilityPenalty,
+    'holding_cost': HoldingCostPenalty,
+    'lost_opportunity': LostOpportunityCostPenalty,
+    'inventory_risk': InventoryRiskPenalty,
+    'spread_capture': SpreadCaptureReward,
+}
+
+def make_objective(name: str, **kwargs) -> BaseObjective:
+    """Create an objective by name.
+
+    Args:
+        name: Objective name (pnl, volatility, holding_cost, lost_opportunity,
+              inventory_risk, spread_capture)
+        **kwargs: Passed to objective constructor
+
+    Returns:
+        Instantiated objective
+    """
+    if name not in REGISTRY:
+        raise ValueError(f"Unknown objective: {name}. Available: {list(REGISTRY.keys())}")
+    return REGISTRY[name](**kwargs)
+
+def make_composite(spec: list[tuple[str, float, dict]] | dict[str, float]) -> CompositeObjective:
+    """Create composite objective from specification.
+
+    Args:
+        spec: Either:
+            - list of (name, weight, kwargs) tuples for full control
+            - dict of {name: weight} for simple cases
+
+    Returns:
+        CompositeObjective with specified components
+    """
+    objectives = []
+    if isinstance(spec, dict):
+        for name, weight in spec.items():
+            objectives.append((make_objective(name), weight))
+    else:
+        for name, weight, kwargs in spec:
+            objectives.append((make_objective(name, **kwargs), weight))
+    return CompositeObjective(objectives)
+
+def retail_objective(volatility_weight: float = 0.1, holding_weight: float = 0.5,
+                     stockout_weight: float = 0.3) -> CompositeObjective:
+    """Default objective for retail dynamic pricing.
+
+    Reward = PnL - volatility_weight*volatility - holding_weight*holding_cost
+             - stockout_weight*lost_opportunity
+    """
+    return make_composite({
+        'pnl': 1.0,
+        'volatility': volatility_weight,
+        'holding_cost': holding_weight,
+        'lost_opportunity': stockout_weight,
+    })
+
+def market_making_objective(gamma: float = 0.1, sigma: float = 1.0) -> CompositeObjective:
+    """Default objective for market making.
+
+    Reward = PnL + 0.5*spread_capture - inventory_risk(gamma, sigma)
+    """
+    return CompositeObjective([
+        (PnLObjective(), 1.0),
+        (SpreadCaptureReward(), 0.5),
+        (InventoryRiskPenalty(gamma=gamma, sigma=sigma), 1.0),
+    ])
--- a/lab/outlet/objectives/penalties.py
+++ b/lab/outlet/objectives/penalties.py
@@ -0,0 +1,101 @@
+"""
+Standard objective components and penalties.
+
+This module provides common reward terms:
+- PnLObjective: Basic profit and loss
+- VolatilityPenalty: Penalize price volatility for UX
+- HoldingCostPenalty: Inventory holding cost
+- LostOpportunityCostPenalty: Stockout/missed fill cost
+- InventoryRiskPenalty: Quadratic inventory risk (market making)
+- SpreadCaptureReward: Bid-ask spread capture (market making)
+"""
+from __future__ import annotations
+import numpy as np
+from .base import BaseObjective
+from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+from ..math_util import inventory_penalty
+
+class PnLObjective(BaseObjective):
+    """Profit and loss reward (revenue - cost)."""
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return metrics.pnl
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost}
+
+class VolatilityPenalty(BaseObjective):
+    """Penalize price volatility for user experience."""
+
+    def __init__(self, scale: float = 1.0):
+        self.scale = scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return -self.scale * metrics.volatility
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'volatility_penalty': -self.scale * metrics.volatility}
+
+class HoldingCostPenalty(BaseObjective):
+    """Penalty for inventory holding costs."""
+
+    def __init__(self, scale: float = 1.0):
+        self.scale = scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return -self.scale * metrics.position_cost
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'holding_cost_penalty': -self.scale * metrics.position_cost}
+
+class LostOpportunityCostPenalty(BaseObjective):
+    """Penalty for lost sales due to stockouts or missed fills."""
+
+    def __init__(self, scale: float = 1.0):
+        self.scale = scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return -self.scale * metrics.lost_opportunity
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity}
+
+class InventoryRiskPenalty(BaseObjective):
+    """Quadratic inventory risk penalty (Avellaneda-Stoikov style).
+
+    Penalty = gamma * sigma^2 * q^2 / 2, where q is total position.
+    Encourages market makers to keep inventory near zero.
+    """
+
+    def __init__(self, gamma: float = 0.1, sigma: float = 1.0):
+        self.gamma = gamma
+        self.sigma = sigma
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        if obs.position is None: return 0.0
+        q = np.sum(obs.position)
+        return -inventory_penalty(q, self.gamma, self.sigma)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)}
+
+class SpreadCaptureReward(BaseObjective):
+    """Reward for capturing bid-ask spread in market making."""
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return metrics.spread_capture
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {'spread_capture': metrics.spread_capture}
--- a/lab/outlet/observation.py
+++ b/lab/outlet/observation.py
@@ -0,0 +1,92 @@
+"""
+Observation construction with demand censoring.
+
+This module provides the ObservationBuilder that constructs agent observations
+from step data. The key invariant is that observations only contain censored
+data (fills) and never true demand, ensuring proper research conditions.
+
+The ObservationConfig controls what is included in observations:
+- Position visibility
+- Market/competitor visibility
+- Demand proxy method
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from .types import Quote, InstrumentSet, StepLogs, StepMetrics, MarketState, HiddenState, Observation
+
+@dataclass
+class ObservationConfig:
+    """Configuration for observation construction.
+
+    Attributes:
+        include_position: Include current position in observation
+        include_market: Include market/competitor state in observation
+        mask_true_demand: If True, observation excludes true demand (research mode)
+        demand_proxy: Method for demand proxy ('fills', 'exposures', 'weighted')
+        exposure_weights: Weights for weighted demand proxy
+    """
+    include_position: bool = True
+    include_market: bool = True
+    mask_true_demand: bool = True
+    demand_proxy: str = 'fills'
+    exposure_weights: dict[str, float] | None = None
+
+class DefaultObservationBuilder:
+    """Constructs censored observations for the agent.
+
+    Ensures the key research invariant: observations contain only
+    censored fills (realized sales), never true demand. True demand
+    is placed in the info dict for research analysis only.
+    """
+
+    def __init__(self, cfg: ObservationConfig | None = None):
+        self.cfg = cfg or ObservationConfig()
+
+    def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs,
+              metrics: StepMetrics, market: MarketState | None,
+              hidden: HiddenState, mask_demand: bool, t: int) -> Observation:
+        n = instruments.n
+        cfg = self.cfg
+
+        # always show censored fills
+        fills = logs.censored_fills if logs.censored_fills is not None else np.zeros(n)
+
+        # compute exposures from logs
+        if logs.events:
+            exposures = np.zeros(n)
+            for e in logs.events:
+                if e.instrument_id is not None:
+                    exposures[e.instrument_id] += 1
+        else:
+            exposures = logs.aggregates.get('exposures', np.zeros(n))
+
+        # position - only if configured and available
+        position = None
+        if cfg.include_position and instruments.position is not None:
+            position = instruments.position.copy()
+
+        # market state - only if configured
+        obs_market = market if cfg.include_market else None
+
+        return Observation(
+            quotes=quote.prices.copy(),
+            position=position,
+            fills=fills,
+            exposures=exposures,
+            market=obs_market,
+            t=t
+        )
+
+    def make_space(self, n_instruments: int, include_market: bool = True) -> dict:
+        """Returns dict describing observation space for gym"""
+        space = {
+            'quotes': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
+            'fills': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
+            'exposures': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
+        }
+        if self.cfg.include_position:
+            space['position'] = {'shape': (n_instruments,), 'low': -np.inf, 'high': np.inf}
+        if include_market:
+            space['competitor_quotes'] = {'shape': (n_instruments,), 'low': 0, 'high': np.inf}
+        return space
--- a/lab/outlet/platform.py
+++ b/lab/outlet/platform.py
@@ -0,0 +1,285 @@
+"""
+Main simulation platform orchestrating the Quote-Control loop.
+
+The Platform class is the central coordinator that:
+1. Receives pricing actions (quotes) from the agent
+2. Generates arrivals via the ArrivalModel
+3. Processes executions via Mechanism and ExecutionModel
+4. Applies position censorship via PositionModel
+5. Computes metrics and reward via Objective
+6. Returns censored observations
+
+Example:
+    >>> from lab.config import make_retail_platform
+    >>> platform = make_retail_platform()
+    >>> result = platform.reset(seed=42)
+    >>> result = platform.step(platform.instruments.refs * 1.1)
+    >>> print(f"PnL: {result.metrics.pnl:.2f}")
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+import numpy as np
+from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs, StepMetrics,
+                    StepEvent, MarketState, HiddenState, Observation, StepResult)
+from .constants import LogLevel, EventType, Side
+from .protocols import Mechanism, ArrivalModel, ExecutionModel, PositionModel, MarketModel, ObservationBuilder, Objective
+from .stock import PositionModel as DefaultPositionModel, PositionConfig
+from .observation import DefaultObservationBuilder, ObservationConfig
+from .objectives.factory import retail_objective
+
+@dataclass
+class PlatformConfig:
+    """Configuration for the simulation platform.
+
+    Attributes:
+        n_instruments: Number of instruments in the simulation
+        max_steps: Maximum steps before episode terminates
+        dt: Time duration per step (affects arrival rates)
+        log_level: Verbosity of logging (NONE, AGG_ONLY, FULL)
+        mask_demand: If True, observations exclude true demand (research mode)
+        seed: Random seed for reproducibility
+    """
+    n_instruments: int = 10
+    max_steps: int = 1000
+    dt: float = 1.0
+    log_level: LogLevel = LogLevel.AGG_ONLY
+    mask_demand: bool = True
+    seed: int | None = None
+
+class Platform:
+    """Main simulation orchestrator implementing Quote -> Arrival -> Execution -> Position.
+
+    The Platform coordinates all components to simulate a pricing environment:
+    - Mechanism: validates quotes and determines execution logic
+    - ArrivalModel: generates demand opportunities
+    - ExecutionModel: computes acceptance probabilities
+    - PositionModel: manages inventory/position and censorship
+    - MarketModel: updates competitor/market state
+    - ObservationBuilder: constructs censored observations
+    - Objective: computes reward from metrics
+
+    Attributes:
+        instruments: The instrument set being priced
+        mechanism: Quote validation and execution mechanism
+        arrival: Demand arrival generator
+        execution: Acceptance probability model
+        position: Inventory/position manager
+        market: Competitor/market dynamics (optional)
+        obs_builder: Observation constructor
+        objective: Reward function
+        cfg: Platform configuration
+    """
+
+    def __init__(self, instruments: InstrumentSet, mechanism: Mechanism,
+                 arrival: ArrivalModel, execution: ExecutionModel,
+                 position: PositionModel | None = None,
+                 market: MarketModel | None = None,
+                 obs_builder: ObservationBuilder | None = None,
+                 objective: Objective | None = None,
+                 cfg: PlatformConfig | None = None):
+        self.instruments = instruments
+        self.mechanism = mechanism
+        self.arrival = arrival
+        self.execution = execution
+        self.position = position or DefaultPositionModel(PositionConfig())
+        self.market = market
+        self.obs_builder = obs_builder or DefaultObservationBuilder()
+        self.objective = objective or retail_objective()
+        self.cfg = cfg or PlatformConfig(n_instruments=instruments.n)
+
+        self._t: int = 0
+        self._rng: np.random.Generator = np.random.default_rng(self.cfg.seed)
+        self._quote: Quote | None = None
+        self._market_state: MarketState | None = None
+        self._hidden: HiddenState = HiddenState()
+        self._prev_prices: np.ndarray | None = None
+
+    def reset(self, seed: int | None = None) -> StepResult:
+        """Reset the platform to initial state.
+
+        Args:
+            seed: Random seed (overrides config seed if provided)
+
+        Returns:
+            Initial StepResult with zeroed metrics and initial observation
+        """
+        self._t = 0
+        self._rng = np.random.default_rng(seed or self.cfg.seed)
+        self._hidden = HiddenState()
+        self._prev_prices = self.instruments.refs.copy()
+
+        # reset position
+        self.position.reset(self.instruments, self._rng)
+        self.instruments.position = self.position.position
+
+        # initial quote at reference prices
+        self._quote = Quote(prices=self.instruments.refs.copy(), propensity=1.0,
+                            metadata={'prev_prices': self._prev_prices})
+        self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng)
+
+        # initial market state
+        if self.market:
+            self._market_state = self.market.step(0, self._quote, self._hidden, self._rng)
+
+        # build initial observation
+        logs = StepLogs(aggregates={'reset': True},
+                        true_demand=np.zeros(self.instruments.n),
+                        censored_fills=np.zeros(self.instruments.n))
+        metrics = StepMetrics()
+        obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics,
+                                     self._market_state, self._hidden, self.cfg.mask_demand, 0)
+
+        return StepResult(obs=obs, reward=0.0, terminated=False, truncated=False,
+                          info={'true_demand': logs.true_demand}, metrics=metrics,
+                          logs=logs, hidden=self._hidden)
+
+    def step(self, action: np.ndarray, propensity: float = 1.0) -> StepResult:
+        """Execute one simulation step with the given pricing action.
+
+        The step proceeds as follows:
+        1. Apply quote constraints via mechanism
+        2. Update market/competitor state
+        3. Generate arrivals
+        4. Process arrivals -> executions with acceptance check
+        5. Apply position censorship to executions
+        6. Update position state
+        7. Compute metrics (PnL, costs, etc.)
+        8. Build logs with propensities
+        9. Construct censored observation
+        10. Compute reward
+
+        Args:
+            action: Price vector for all instruments
+            propensity: P(action | behavior policy) for OPE logging
+
+        Returns:
+            StepResult containing observation, reward, metrics, logs, and hidden state
+        """
+        self._t += 1
+        cfg = self.cfg
+
+        # 1. apply quote from action
+        self._quote = Quote(prices=action, propensity=propensity,
+                            metadata={'prev_prices': self._prev_prices})
+        self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng)
+        self._prev_prices = self._quote.prices.copy()
+        self._hidden.quote_history.append(self._quote.prices.copy())
+
+        # 2. update market/competitors
+        if self.market:
+            self._market_state = self.market.step(self._t, self._quote, self._hidden, self._rng)
+            self._hidden.market_history.append(self._market_state)
+
+        # 3. generate arrivals
+        opps = self.arrival.sample(self._t, cfg.dt, self.instruments,
+                                   self._market_state, self._hidden, self._rng)
+
+        # 4. process opportunities -> executions
+        executions: list[Execution] = []
+        events: list[StepEvent] = []
+        true_demand = np.zeros(self.instruments.n)
+
+        for opp in opps:
+            # log exposure
+            if cfg.log_level == LogLevel.FULL:
+                events.append(StepEvent(t=opp.t, type=EventType.EXPOSURE,
+                                        instrument_id=opp.instrument_id,
+                                        opportunity_id=opp.id,
+                                        price=float(self._quote.prices[opp.instrument_id]),
+                                        propensity=self._quote.propensity))
+
+            # check acceptance
+            prob = self.execution.prob(opp, self._quote, self.instruments,
+                                       self._market_state, self._rng)
+            if self._rng.random() < prob:
+                # create execution
+                exe = self.mechanism.process_opportunity(opp, self._quote, self.instruments,
+                                                         self._market_state, self._rng)
+                if exe:
+                    true_demand[exe.instrument_id] += exe.size_requested
+                    # apply position censorship
+                    exe = self.position.apply_execution(exe)
+                    executions.append(exe)
+                    if cfg.log_level == LogLevel.FULL:
+                        events.append(StepEvent(t=exe.t, type=EventType.EXECUTION,
+                                                instrument_id=exe.instrument_id,
+                                                opportunity_id=exe.opportunity_id,
+                                                price=exe.price, size=exe.size_filled,
+                                                propensity=exe.propensity))
+
+        # 5. update position state
+        self.position.step(self._t)
+        self.instruments.position = self.position.position
+
+        # 6. compute metrics
+        censored_fills = np.zeros(self.instruments.n)
+        revenue = 0.0
+        cost = 0.0
+        spread_capture = 0.0
+
+        for exe in executions:
+            censored_fills[exe.instrument_id] += exe.size_filled
+            if exe.side == Side.BUY:
+                revenue += exe.price * exe.size_filled
+                cost += self.instruments.costs[exe.instrument_id] * exe.size_filled
+            else:
+                revenue -= exe.price * exe.size_filled
+                cost -= self.instruments.costs[exe.instrument_id] * exe.size_filled
+            # spread capture for market making
+            if self._quote.spreads is not None and self._market_state and self._market_state.mid_prices is not None:
+                mid = self._market_state.mid_prices[exe.instrument_id]
+                if exe.side == Side.BUY:
+                    spread_capture += (exe.price - mid) * exe.size_filled
+                else:
+                    spread_capture += (mid - exe.price) * exe.size_filled
+
+        pnl = revenue - cost
+        units = float(np.sum(censored_fills))
+        lost = float(np.sum(true_demand - censored_fills))
+
+        # volatility
+        volatility = 0.0
+        if len(self._hidden.quote_history) > 1:
+            prev = self._hidden.quote_history[-2]
+            volatility = float(np.mean(np.abs(self._quote.prices - prev) / (prev + 1e-8)))
+
+        metrics = StepMetrics(
+            pnl=pnl, revenue=revenue, cost=cost, units_traded=units,
+            position_cost=self.position.holding_cost,
+            lost_opportunity=self.position.shortage_cost + lost * np.mean(self._quote.prices) * 0.1,
+            spread_capture=spread_capture, volatility=volatility,
+            conversion=units / (len(opps) + 1e-8),
+            per_instrument={'fills': censored_fills, 'demand': true_demand}
+        )
+
+        # 7. build logs
+        logs = StepLogs(
+            events=events if cfg.log_level == LogLevel.FULL else None,
+            executions=executions if cfg.log_level == LogLevel.FULL else None,
+            aggregates={'n_arrivals': len(opps), 'n_executions': len(executions),
+                        'exposures': np.bincount([o.instrument_id for o in opps],
+                                                 minlength=self.instruments.n).astype(float)},
+            true_demand=true_demand,
+            censored_fills=censored_fills
+        )
+
+        # 8. build observation
+        obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics,
+                                     self._market_state, self._hidden, cfg.mask_demand, self._t)
+
+        # 9. compute reward
+        reward = self.objective.reward(self._quote, self.instruments, metrics, self._hidden, obs)
+        breakdown = self.objective.breakdown(self._quote, self.instruments, metrics, self._hidden, obs)
+        # print(f"Step {self._t}: Reward={reward:.2f}, Breakdown={breakdown}")
+
+
+        # 10. check termination
+        terminated = self._t >= cfg.max_steps
+        truncated = False
+
+        info = {'true_demand': true_demand, 'breakdown': self.objective.breakdown(
+            self._quote, self.instruments, metrics, self._hidden, obs)}
+
+        return StepResult(obs=obs, reward=reward, terminated=terminated, truncated=truncated,
+                          info=info, metrics=metrics, logs=logs, hidden=self._hidden)
--- a/lab/outlet/protocols.py
+++ b/lab/outlet/protocols.py
@@ -0,0 +1,297 @@
+"""
+Protocol definitions for pluggable simulator components.
+
+This module defines the interfaces (Protocols) that allow swapping different
+implementations for each stage of the Quote -> Arrival -> Execution -> Position
+pipeline. All protocols use structural subtyping (duck typing).
+
+Protocols:
+    Mechanism: How quotes translate to executions (posted price, two-sided, auction)
+    ArrivalModel: How opportunities arrive (Poisson, Hawkes, sessions)
+    ExecutionModel: Acceptance probability given quote (elasticity, intensity)
+    PositionModel: Inventory/position management and censorship
+    MarketModel: Competitor/market dynamics
+    ObservationBuilder: Constructs agent observations with censoring
+    Objective: Computes reward from metrics
+"""
+from __future__ import annotations
+from typing import Protocol, Any, TYPE_CHECKING
+import numpy as np
+if TYPE_CHECKING:
+    from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs,
+                        StepMetrics, HiddenState, Observation, MarketState)
+    from .constants import LogLevel
+
+class Mechanism(Protocol):
+    """Defines how quotes translate to executions.
+
+    The Mechanism is the core abstraction that differentiates pricing domains:
+    - PostedPrice: single price, buyer decides to purchase or not
+    - TwoSided: bid/ask spread, execution depends on distance from mid
+    - Auction: reserve price affects win probability and clearing price
+
+    Methods:
+        apply_quote: Enforce constraints and return valid quote
+        process_opportunity: Determine execution given opportunity and quote
+    """
+    def apply_quote(self, quote: Quote, instruments: InstrumentSet,
+                    rng: np.random.Generator) -> Quote:
+        """Apply mechanism-specific constraints to a quote.
+
+        Args:
+            quote: Raw quote from policy
+            instruments: Current instrument set with costs/refs
+            rng: Random generator for stochastic constraints
+
+        Returns:
+            Constrained quote satisfying mechanism rules (min margin, max delta, etc.)
+        """
+        ...
+
+    def process_opportunity(self, opp: Opportunity, quote: Quote,
+                            instruments: InstrumentSet, market: MarketState | None,
+                            rng: np.random.Generator) -> Execution | None:
+        """Process an opportunity against the current quote.
+
+        Args:
+            opp: Incoming opportunity (session, order, request)
+            quote: Current posted quote
+            instruments: Instrument set
+            market: Current market state (competitor prices, mid-prices)
+            rng: Random generator
+
+        Returns:
+            Execution if opportunity converts, None otherwise
+        """
+        ...
+
+class ArrivalModel(Protocol):
+    """Generates opportunities (demand arrivals) for each step.
+
+    Different arrival models capture different demand dynamics:
+    - Poisson: constant rate, memoryless
+    - Hawkes: self-exciting, clustered arrivals
+    - Session: retail browsing with multi-product views
+
+    Methods:
+        sample: Generate opportunities for a time interval
+    """
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState,
+               rng: np.random.Generator) -> list[Opportunity]:
+        """Sample opportunities for time interval [t, t+dt).
+
+        Args:
+            t: Current time
+            dt: Time interval length
+            instruments: Available instruments
+            market: Current market state
+            hidden: Hidden state (contains demand intensity, contamination)
+            rng: Random generator
+
+        Returns:
+            List of opportunities arriving in this interval
+        """
+        ...
+
+class ExecutionModel(Protocol):
+    """Computes acceptance/execution probability given quote and context.
+
+    Different models capture different demand responses:
+    - Elasticity: price sensitivity with competitor cross-effects
+    - Intensity: distance-based fill probability (market making)
+    - Logit: discrete choice model
+
+    Methods:
+        prob: Compute acceptance probability
+        uncensor: Estimate true demand from censored fills
+    """
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        """Compute probability that opportunity accepts the quote.
+
+        Args:
+            opp: Opportunity to evaluate
+            quote: Current quote
+            instruments: Instrument set
+            market: Market state (competitor prices affect cross-elasticity)
+            rng: Random generator
+
+        Returns:
+            Probability in [0, 1] that opportunity executes
+        """
+        ...
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
+                 context: dict[str, Any] | None = None) -> np.ndarray:
+        """Estimate true demand from censored fills.
+
+        Used for demand estimation research under inventory censorship.
+
+        Args:
+            fills: Observed (censored) fill counts
+            instruments: Instrument set
+            context: Additional context (exposures, prices shown)
+
+        Returns:
+            Estimated true demand counts
+        """
+        ...
+
+class PositionModel(Protocol):
+    """Manages inventory (retail) or position (finance).
+
+    Handles:
+    - Position constraints and censorship
+    - Holding costs (retail) or inventory risk (finance)
+    - Replenishment and order receipt
+
+    Methods:
+        reset: Initialize position state
+        available: Query available capacity for a trade
+        apply_execution: Censor execution by available position
+        step: Process time-based updates (replenishment, holding cost)
+
+    Properties:
+        position: Current position vector
+        holding_cost: Cost incurred this step from holding position
+    """
+    def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None:
+        """Initialize position state for new episode."""
+        ...
+
+    def available(self, instrument_id: int, side: Any) -> float:
+        """Query available capacity for a trade.
+
+        Args:
+            instrument_id: Which instrument
+            side: BUY or SELL
+
+        Returns:
+            Maximum tradeable size given current position
+        """
+        ...
+
+    def apply_execution(self, exe: Execution) -> Execution:
+        """Apply position constraints to an execution.
+
+        Args:
+            exe: Proposed execution with size_requested
+
+        Returns:
+            Censored execution with size_filled <= available capacity
+        """
+        ...
+
+    def step(self, t: float) -> None:
+        """Process time-based position updates.
+
+        Handles replenishment receipt, holding cost calculation, etc.
+        """
+        ...
+
+    @property
+    def position(self) -> np.ndarray:
+        """Current position vector (positive=long/inventory, negative=short)."""
+        ...
+
+    @property
+    def holding_cost(self) -> float:
+        """Holding cost incurred this step."""
+        ...
+
+class MarketModel(Protocol):
+    """Models external market dynamics and competitor behavior.
+
+    For retail: competitor price dynamics (static, reactive, stochastic)
+    For finance: mid-price process (GBM, mean-reverting)
+
+    Methods:
+        step: Update market state given agent's quotes
+    """
+    def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
+             rng: np.random.Generator) -> MarketState:
+        """Update market state for this timestep.
+
+        Args:
+            t: Current time
+            self_quotes: Agent's current quotes (competitors may react)
+            hidden: Hidden state (regime info)
+            rng: Random generator
+
+        Returns:
+            Updated market state with competitor prices, mid-prices, volatility
+        """
+        ...
+
+class ObservationBuilder(Protocol):
+    """Constructs agent observations with appropriate censoring.
+
+    Critical for research: ensures agent only sees censored fills,
+    never true demand (which goes in info dict).
+
+    Methods:
+        build: Construct observation from step data
+    """
+    def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs,
+              metrics: StepMetrics, market: MarketState | None,
+              hidden: HiddenState, mask_demand: bool, t: int) -> Observation:
+        """Build observation for agent.
+
+        Args:
+            quote: Current quote
+            instruments: Instrument set with positions
+            logs: Step logs with true_demand and censored_fills
+            metrics: Computed metrics
+            market: Market state
+            hidden: Hidden state (not included in obs)
+            mask_demand: If True, exclude true demand from observation
+            t: Current timestep
+
+        Returns:
+            Observation containing only observable quantities
+        """
+        ...
+
+class Objective(Protocol):
+    """Computes reward from step metrics.
+
+    Supports composite objectives with weighted terms:
+    - PnL (profit)
+    - Position costs (holding, inventory risk)
+    - Lost opportunity (stockouts)
+    - Volatility penalty (UX)
+    - Spread capture (market making)
+
+    Methods:
+        reward: Compute scalar reward
+        breakdown: Get per-term contribution for analysis
+    """
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState,
+               obs: Observation) -> float:
+        """Compute scalar reward for this step.
+
+        Args:
+            quote: Current quote
+            instruments: Instrument set
+            metrics: Step metrics (pnl, costs, etc.)
+            hidden: Hidden state
+            obs: Agent observation
+
+        Returns:
+            Scalar reward value
+        """
+        ...
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState,
+                  obs: Observation) -> dict[str, float]:
+        """Get reward breakdown by component.
+
+        Useful for analyzing which terms dominate the reward.
+
+        Returns:
+            Dict mapping term names to their contributions
+        """
+        ...
--- a/lab/outlet/stock.py
+++ b/lab/outlet/stock.py
@@ -0,0 +1,151 @@
+"""
+Inventory/position management and instrument factories.
+
+This module provides:
+- PositionConfig: Configuration for position constraints and costs
+- PositionModel: Manages inventory (retail) or position (finance)
+- make_instruments: Factory for creating instrument sets
+
+The PositionModel handles demand censorship by limiting executions
+to available inventory, computing holding costs, and managing replenishment.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+import numpy as np
+from .types import Instrument, InstrumentSet, Execution
+from .constants import Side, InstrumentType
+
+@dataclass
+class PositionConfig:
+    """Configuration for position/inventory management.
+
+    Attributes:
+        initial_position: Starting inventory (None = unlimited, float = same for all)
+        max_position: Maximum long position per instrument
+        min_position: Maximum short position (negative, for finance)
+        holding_cost_rate: Cost per unit per step for holding inventory
+        shortage_cost_rate: Opportunity cost rate for stockouts
+        lead_time: Steps until replenishment orders arrive
+    """
+    initial_position: np.ndarray | float | None = None
+    max_position: float = 1000.0
+    min_position: float = -1000.0
+    holding_cost_rate: float = 0.001
+    shortage_cost_rate: float = 0.05
+    lead_time: int = 0
+
+@dataclass
+class PositionModel:
+    """Manages inventory (retail) or position (finance) with censorship.
+
+    Key responsibilities:
+    - Track current position per instrument
+    - Censor executions when position is insufficient
+    - Compute holding costs per step
+    - Track shortage/stockout costs
+    - Handle replenishment orders with lead time
+
+    For retail: position is inventory (positive), selling reduces it
+    For finance: position can be positive (long) or negative (short)
+    """
+    cfg: PositionConfig
+    n: int = 0
+    _position: np.ndarray = field(default_factory=lambda: np.array([]))
+    _pending_orders: list[tuple[int, np.ndarray]] = field(default_factory=list)
+    _step_holding_cost: float = 0.0
+    _step_shortage_cost: float = 0.0
+
+    def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None:
+        self.n = instruments.n
+        if self.cfg.initial_position is None:
+            self._position = np.full(self.n, np.inf)  # unlimited
+        elif isinstance(self.cfg.initial_position, (int, float)):
+            self._position = np.full(self.n, float(self.cfg.initial_position))
+        else:
+            self._position = self.cfg.initial_position.copy().astype(np.float64)
+        self._pending_orders = []
+        self._step_holding_cost = 0.0
+        self._step_shortage_cost = 0.0
+
+    def available(self, instrument_id: int, side: Side) -> float:
+        pos = self._position[instrument_id]
+        if np.isinf(pos): return np.inf
+        if side == Side.BUY:
+            return max(0, pos)  # can sell up to current inventory
+        else:
+            return max(0, self.cfg.max_position - pos)  # can buy up to max
+
+    def apply_execution(self, exe: Execution) -> Execution:
+        idx = int(exe.instrument_id)
+        avail = self.available(idx, exe.side)
+        filled = min(exe.size_requested, avail)
+        shortage = exe.size_requested - filled
+
+        if exe.side == Side.BUY:
+            self._position[idx] -= filled  # sold from inventory
+        else:
+            self._position[idx] += filled  # bought into inventory
+
+        if shortage > 0:
+            self._step_shortage_cost += shortage * exe.price * self.cfg.shortage_cost_rate
+
+        return Execution(
+            opportunity_id=exe.opportunity_id, instrument_id=exe.instrument_id,
+            side=exe.side, size_requested=exe.size_requested,
+            size_filled=filled, price=exe.price, propensity=exe.propensity, t=exe.t
+        )
+
+    def order(self, quantity: np.ndarray) -> None:
+        if self.cfg.lead_time > 0:
+            self._pending_orders.append((self.cfg.lead_time, quantity.copy()))
+        else:
+            self._position += quantity
+
+    def step(self, t: float) -> None:
+        # compute holding cost
+        pos = np.where(np.isinf(self._position), 0, self._position)
+        self._step_holding_cost = float(np.sum(np.abs(pos)) * self.cfg.holding_cost_rate)
+
+        # receive pending orders
+        new_pending = []
+        for (remaining, qty) in self._pending_orders:
+            if remaining <= 1:
+                self._position += qty
+            else:
+                new_pending.append((remaining - 1, qty))
+        self._pending_orders = new_pending
+
+    @property
+    def position(self) -> np.ndarray:
+        return np.where(np.isinf(self._position), -1, self._position)
+
+    @property
+    def holding_cost(self) -> float:
+        return self._step_holding_cost
+
+    @property
+    def shortage_cost(self) -> float:
+        return self._step_shortage_cost
+
+def make_instruments(n: int, cost_range: tuple[float, float] = (1.0, 10.0),
+                     margin_range: tuple[float, float] = (0.2, 0.5),
+                     inst_type: InstrumentType = InstrumentType.SKU,
+                     rng: np.random.Generator | None = None) -> InstrumentSet:
+    """Factory function to create a random instrument set.
+
+    Args:
+        n: Number of instruments to create
+        cost_range: (min, max) for uniform cost sampling
+        margin_range: (min, max) for uniform margin sampling
+        inst_type: Type of instruments (SKU, ASSET, etc.)
+        rng: Random generator (uses default if None)
+
+    Returns:
+        InstrumentSet with n instruments having random costs and margins
+    """
+    rng = rng or np.random.default_rng()
+    costs = rng.uniform(*cost_range, n)
+    margins = rng.uniform(*margin_range, n)
+    items = [Instrument(id=i, type=inst_type, cost_basis=c, reference_price=c*(1+m))
+             for i, (c, m) in enumerate(zip(costs, margins))]
+    return InstrumentSet(instruments=items)
--- a/lab/outlet/types.py
+++ b/lab/outlet/types.py
@@ -0,0 +1,318 @@
+"""
+Core data types for the Quote-Control simulator.
+
+This module defines the fundamental data structures used throughout the platform:
+- Identifiers (InstrumentId, OpportunityId, AgentId)
+- Domain objects (Instrument, Quote, Opportunity, Execution)
+- Logging structures (StepEvent, StepLogs, StepMetrics)
+- State containers (MarketState, HiddenState, Observation, StepResult)
+
+All dataclasses are designed to be serializable and numpy-compatible.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, NewType
+import numpy as np
+from .constants import Side, InstrumentType, OpportunityType, EventType
+
+InstrumentId = NewType('InstrumentId', int)  # unique instrument index
+OpportunityId = NewType('OpportunityId', str)  # unique opportunity/session ID
+AgentId = NewType('AgentId', str)  # unique agent/actor ID
+
+@dataclass
+class Instrument:
+    """Represents a priceable entity in the simulation.
+
+    An instrument can be a retail SKU, financial asset, loan product, or subscription.
+    The cost_basis represents the fundamental value (marginal cost for retail,
+    mid-price for assets, funding rate for loans).
+
+    Attributes:
+        id: Unique identifier for this instrument
+        type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION)
+        cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate)
+        reference_price: Base or fair price used for action scaling
+        attrs: Additional attributes (quality score, category, volatility, etc.)
+    """
+    id: InstrumentId
+    type: InstrumentType
+    cost_basis: float
+    reference_price: float
+    attrs: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class InstrumentSet:
+    """Collection of instruments with optional position tracking.
+
+    Provides vectorized access to instrument properties for efficient computation.
+    Position can be positive (long/inventory) or negative (short) for financial assets.
+
+    Attributes:
+        instruments: List of Instrument objects
+        position: Current position per instrument (None = unlimited capacity)
+
+    Properties:
+        n: Number of instruments
+        costs: Vector of cost bases
+        refs: Vector of reference prices
+    """
+    instruments: list[Instrument]
+    position: np.ndarray | None = None
+
+    @property
+    def n(self) -> int: return len(self.instruments)
+    @property
+    def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32)
+    @property
+    def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32)
+
+@dataclass
+class Quote:
+    """Price quote set by the policy - the action in the MDP.
+
+    Supports multiple quoting mechanisms:
+    - Posted price: only `prices` field used
+    - Two-sided: `prices` as mid, `spreads` for bid-ask width
+    - Auction: `prices` as reserve prices
+
+    The propensity field is critical for off-policy evaluation (OPE).
+
+    Attributes:
+        prices: Posted prices (retail) or mid-quotes (market making)
+        spreads: Bid-ask spread width for two-sided quoting (None for posted price)
+        propensity: P(this quote | behavior policy) for importance sampling
+        metadata: Additional info (prev_prices for delta constraints, etc.)
+
+    Properties:
+        bids: Computed bid prices (mid - spread/2)
+        asks: Computed ask prices (mid + spread/2)
+    """
+    prices: np.ndarray
+    spreads: np.ndarray | None = None
+    propensity: float = 1.0
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    @property
+    def bids(self) -> np.ndarray | None:
+        return self.prices - self.spreads/2 if self.spreads is not None else None
+    @property
+    def asks(self) -> np.ndarray | None:
+        return self.prices + self.spreads/2 if self.spreads is not None else None
+
+@dataclass
+class Opportunity:
+    """An arrival event that may result in a transaction.
+
+    Opportunities are the demand side of the simulation:
+    - Retail: browsing session with purchase intent
+    - Market making: incoming market order
+    - Lending: loan application
+
+    The context dict carries segment/type information used by execution models.
+
+    Attributes:
+        id: Unique identifier for this opportunity
+        type: Category (SESSION, MARKET_ORDER, REQUEST)
+        side: BUY or SELL intent
+        instrument_id: Which instrument the opportunity targets
+        size: Requested transaction size (units, shares, principal)
+        t: Arrival timestamp
+        context: Segment info (is_scraper, credit_score, urgency, etc.)
+    """
+    id: OpportunityId
+    type: OpportunityType
+    side: Side
+    instrument_id: InstrumentId
+    size: float = 1.0
+    t: float = 0.0
+    context: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class Execution:
+    """A realized transaction after acceptance and position censorship.
+
+    The difference between size_requested and size_filled represents
+    censored demand due to inventory/position constraints.
+
+    Attributes:
+        opportunity_id: Links back to the originating Opportunity
+        instrument_id: Which instrument was traded
+        side: BUY or SELL
+        size_requested: Original requested size (true demand)
+        size_filled: Actual filled size after censorship
+        price: Execution price
+        propensity: Combined propensity for OPE (quote * acceptance)
+        t: Execution timestamp
+    """
+    opportunity_id: OpportunityId
+    instrument_id: InstrumentId
+    side: Side
+    size_requested: float
+    size_filled: float
+    price: float
+    propensity: float = 1.0
+    t: float = 0.0
+
+@dataclass
+class StepEvent:
+    """Generic logged event"""
+    t: float
+    type: EventType
+    instrument_id: InstrumentId | None = None
+    opportunity_id: OpportunityId | None = None
+    price: float | None = None
+    size: float | None = None
+    propensity: float = 1.0
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class StepLogs:
+    """Container for all logging data from a simulation step.
+
+    Supports both detailed event logging (for OPE) and aggregate-only mode
+    (for fast simulation). The true_demand vs censored_fills distinction
+    is critical for research on demand estimation under censorship.
+
+    Attributes:
+        events: Detailed event log (None if LogLevel != FULL)
+        executions: List of executed transactions (None if LogLevel != FULL)
+        aggregates: Always-available aggregate statistics
+        true_demand: Oracle demand before censorship (for research, not in obs)
+        censored_fills: Realized fills after position constraints (observable)
+    """
+    events: list[StepEvent] | None = None
+    executions: list[Execution] | None = None
+    aggregates: dict[str, Any] = field(default_factory=dict)
+    true_demand: np.ndarray | None = None
+    censored_fills: np.ndarray | None = None
+
+@dataclass
+class StepMetrics:
+    """Computed metrics for a single simulation step.
+
+    Metrics are domain-aware: retail uses revenue/cost/holding_cost,
+    market making uses spread_capture and inventory risk.
+
+    Attributes:
+        pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance)
+        revenue: Gross revenue from sales/executions
+        cost: Cost of goods sold or position acquisition cost
+        units_traded: Total units/shares transacted
+        position_cost: Holding cost (retail) or inventory risk penalty (finance)
+        lost_opportunity: Cost of stockouts or missed fills
+        spread_capture: Bid-ask spread captured (market making)
+        volatility: Price volatility metric for UX consideration
+        conversion: Fill rate (executions / opportunities)
+        per_instrument: Per-instrument breakdowns (fills, demand, etc.)
+    """
+    pnl: float = 0.0
+    revenue: float = 0.0
+    cost: float = 0.0
+    units_traded: float = 0.0
+    position_cost: float = 0.0
+    lost_opportunity: float = 0.0
+    spread_capture: float = 0.0
+    volatility: float = 0.0
+    conversion: float = 0.0
+    per_instrument: dict[str, np.ndarray] = field(default_factory=dict)
+
+@dataclass
+class MarketState:
+    """External market conditions and competitor state.
+
+    For retail: competitor_quotes drives cross-elasticity effects.
+    For finance: mid_prices and volatility drive execution dynamics.
+
+    Attributes:
+        competitor_quotes: Competitor posted prices (retail)
+        mid_prices: Market mid-prices for assets (finance)
+        volatility: Per-instrument volatility estimate
+        regime: Market regime identifier (normal, price_war, high_vol, etc.)
+        t: Timestamp of this market state
+    """
+    competitor_quotes: np.ndarray | None = None
+    mid_prices: np.ndarray | None = None
+    volatility: np.ndarray | None = None
+    regime: str = 'normal'
+    t: float = 0.0
+
+@dataclass
+class HiddenState:
+    """Internal simulator state not exposed to the agent.
+
+    Contains oracle information for research analysis and
+    history needed for non-stationary dynamics.
+
+    Attributes:
+        true_demand_intensity: Latent demand multiplier
+        contamination: Fraction of arrivals that are adversarial/scraper
+        regime: Current market/competitor regime
+        quote_history: History of agent quotes for volatility calculation
+        market_history: History of market states for analysis
+    """
+    true_demand_intensity: float = 1.0
+    contamination: float = 0.0
+    regime: str = 'normal'
+    quote_history: list[np.ndarray] = field(default_factory=list)
+    market_history: list[MarketState] = field(default_factory=list)
+
+@dataclass
+class Observation:
+    """Observable state provided to the agent - censored view only.
+
+    Critical invariant: Observation never contains true_demand, only
+    censored fills. This enforces the censorship research setting.
+
+    Attributes:
+        quotes: Current posted quotes (the agent's last action)
+        position: Current inventory/position state
+        fills: Censored execution counts per instrument
+        exposures: Opportunity exposure counts per instrument
+        market: Observable market state (competitor prices, volatility)
+        t: Current timestep
+        extra: Additional observable features
+
+    Methods:
+        to_flat: Flatten to numpy array for gym compatibility
+    """
+    quotes: np.ndarray
+    position: np.ndarray | None
+    fills: np.ndarray
+    exposures: np.ndarray
+    market: MarketState | None
+    t: int
+    extra: dict[str, Any] = field(default_factory=dict)
+
+    def to_flat(self) -> np.ndarray:
+        """Flatten observation to 1D numpy array for gym environments."""
+        parts = [self.quotes, self.fills, self.exposures]
+        if self.position is not None: parts.append(self.position)
+        if self.market and self.market.competitor_quotes is not None:
+            parts.append(self.market.competitor_quotes)
+        return np.concatenate([p.flatten() for p in parts])
+
+@dataclass
+class StepResult:
+    """Complete result from a simulation step.
+
+    Follows gymnasium convention for obs, reward, terminated, truncated, info.
+    Additionally provides metrics, logs, and hidden state for research.
+
+    Attributes:
+        obs: Observable state (censored)
+        reward: Scalar reward from objective function
+        terminated: Episode ended naturally (max_steps reached)
+        truncated: Episode ended early (bankruptcy, constraint violation)
+        info: Additional info dict (contains true_demand for research)
+        metrics: Computed metrics for this step
+        logs: Event logs and aggregates
+        hidden: Internal simulator state (oracle info)
+    """
+    obs: Observation
+    reward: float
+    terminated: bool
+    truncated: bool
+    info: dict[str, Any]
+    metrics: StepMetrics
+    logs: StepLogs
+    hidden: HiddenState
--- a/lab/population/init.py
+++ b/lab/population/init.py
@@ -0,0 +1,10 @@
+from .arrivals import PoissonArrivalModel, HawkesArrivalModel, SessionArrivalModel
+from .execution import ElasticityExecutionModel, IntensityExecutionModel, LogitExecutionModel
+from .competitors import (StaticCompetitorModel, ReactiveCompetitorModel,
+                          StochasticCompetitorModel, GBMMarketModel)
+
+__all__ = [
+    'PoissonArrivalModel', 'HawkesArrivalModel', 'SessionArrivalModel',
+    'ElasticityExecutionModel', 'IntensityExecutionModel', 'LogitExecutionModel',
+    'StaticCompetitorModel', 'ReactiveCompetitorModel', 'StochasticCompetitorModel', 'GBMMarketModel',
+]
--- a/lab/population/arrivals.py
+++ b/lab/population/arrivals.py
@@ -0,0 +1,168 @@
+"""
+Arrival models for generating demand opportunities.
+
+This module provides different arrival processes:
+- PoissonArrivalModel: Constant-rate memoryless arrivals
+- HawkesArrivalModel: Self-exciting clustered arrivals (market orders)
+- SessionArrivalModel: Retail browsing sessions with multi-product views
+
+Each model implements the ArrivalModel protocol and generates Opportunity objects
+that flow through the execution pipeline.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Callable
+import numpy as np
+from uuid import uuid4
+from ..outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
+from ..outlet.constants import Side, OpportunityType
+from ..outlet.math_util import poisson_arrivals, hawkes_intensity
+
+@dataclass
+class PoissonArrivalConfig:
+    """Configuration for Poisson arrival process.
+
+    Attributes:
+        base_rate: Expected arrivals per unit time (scaled by hidden.true_demand_intensity)
+        side_probs: Probability distribution over BUY/SELL sides
+    """
+    base_rate: float = 10.0
+    side_probs: dict[Side, float] = None
+
+    def __post_init__(self):
+        if self.side_probs is None:
+            self.side_probs = {Side.BUY: 1.0}
+
+class PoissonArrivalModel:
+    """Homogeneous Poisson arrival process.
+
+    Generates arrivals at a constant rate (modulated by demand intensity).
+    Suitable for stationary demand or as a baseline model.
+
+    The actual arrival count follows Poisson(rate * dt * intensity).
+    """
+
+    def __init__(self, cfg: PoissonArrivalConfig | None = None):
+        self.cfg = cfg or PoissonArrivalConfig()
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState,
+               rng: np.random.Generator) -> list[Opportunity]:
+        n_arrivals = poisson_arrivals(self.cfg.base_rate * hidden.true_demand_intensity, dt, rng)
+        opps = []
+        for _ in range(n_arrivals):
+            inst_id = rng.integers(0, instruments.n)
+            side = rng.choice(list(self.cfg.side_probs.keys()),
+                              p=list(self.cfg.side_probs.values()))
+            opps.append(Opportunity(
+                id=str(uuid4())[:8], type=OpportunityType.SESSION,
+                side=side, instrument_id=inst_id, size=1.0, t=t,
+                context={'segment': 'default'}
+            ))
+        return opps
+
+@dataclass
+class HawkesArrivalConfig:
+    """Configuration for Hawkes self-exciting process.
+
+    Attributes:
+        base_rate: Baseline arrival intensity
+        alpha: Excitation strength (how much each arrival increases intensity)
+        beta: Decay rate (how quickly excitation fades)
+        side_probs: Probability distribution over BUY/SELL sides
+    """
+    base_rate: float = 5.0
+    alpha: float = 0.5
+    beta: float = 1.0
+    side_probs: dict[Side, float] = None
+
+    def __post_init__(self):
+        if self.side_probs is None:
+            self.side_probs = {Side.BUY: 0.5, Side.SELL: 0.5}
+
+class HawkesArrivalModel:
+    """Self-exciting Hawkes point process for clustered arrivals.
+
+    Models order flow where arrivals cluster in time (momentum, herding).
+    Intensity: lambda(t) = base + alpha * sum(exp(-beta * (t - t_i)))
+
+    Used for market making scenarios where orders arrive in bursts.
+    """
+
+    def __init__(self, cfg: HawkesArrivalConfig | None = None):
+        self.cfg = cfg or HawkesArrivalConfig()
+        self._history: np.ndarray = np.array([])
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState,
+               rng: np.random.Generator) -> list[Opportunity]:
+        intensity = hawkes_intensity(
+            self.cfg.base_rate * hidden.true_demand_intensity,
+            self._history, self.cfg.alpha, self.cfg.beta, t
+        )
+        n_arrivals = poisson_arrivals(intensity, dt, rng)
+        opps = []
+        for i in range(n_arrivals):
+            arr_t = t + rng.uniform(0, dt)
+            self._history = np.append(self._history, arr_t)
+            inst_id = rng.integers(0, instruments.n)
+            side = rng.choice(list(self.cfg.side_probs.keys()),
+                              p=list(self.cfg.side_probs.values()))
+            opps.append(Opportunity(
+                id=str(uuid4())[:8], type=OpportunityType.MARKET_ORDER,
+                side=side, instrument_id=inst_id,
+                size=rng.exponential(1.0), t=arr_t,
+                context={'intensity': intensity}
+            ))
+        # decay old history
+        self._history = self._history[self._history > t - 10]
+        return opps
+
+@dataclass
+class SessionArrivalConfig:
+    """Configuration for retail session arrivals.
+
+    Attributes:
+        sessions_per_step: Number of browsing sessions per step
+        views_per_session: (min, max) product views per session
+        contamination: Fraction of sessions that are scrapers/bots
+    """
+    sessions_per_step: int = 20
+    views_per_session: tuple[int, int] = (1, 5)
+    contamination: float = 0.0
+
+class SessionArrivalModel:
+    """Retail browsing session model with multi-product views.
+
+    Each session views multiple products, generating one opportunity per view.
+    Scraper sessions (controlled by contamination) view more products
+    but convert at lower rates (handled by ExecutionModel).
+    """
+
+    def __init__(self, cfg: SessionArrivalConfig | None = None):
+        self.cfg = cfg or SessionArrivalConfig()
+
+    def sample(self, t: float, dt: float, instruments: InstrumentSet,
+               market: MarketState | None, hidden: HiddenState,
+               rng: np.random.Generator) -> list[Opportunity]:
+        n_sessions = self.cfg.sessions_per_step
+        contamination = hidden.contamination if hidden else self.cfg.contamination
+        opps = []
+
+        for _ in range(n_sessions):
+            is_scraper = rng.random() < contamination
+            n_views = rng.integers(*self.cfg.views_per_session)
+            sid = str(uuid4())[:8]
+
+            # scrapers view more products
+            if is_scraper:
+                n_views = min(instruments.n, n_views * 3)
+
+            viewed = rng.choice(instruments.n, size=min(n_views, instruments.n), replace=False)
+            for inst_id in viewed:
+                opps.append(Opportunity(
+                    id=f"{sid}-{inst_id}", type=OpportunityType.SESSION,
+                    side=Side.BUY, instrument_id=int(inst_id), size=1.0, t=t,
+                    context={'session_id': sid, 'is_scraper': is_scraper, 'n_views': n_views}
+                ))
+        return opps
--- a/lab/population/competitors.py
+++ b/lab/population/competitors.py
@@ -0,0 +1,189 @@
+"""
+Market and competitor models for external dynamics.
+
+This module provides models for competitor pricing (retail) and market dynamics (finance):
+- StaticCompetitorModel: Fixed competitor prices
+- ReactiveCompetitorModel: Competitor reacts to agent's prices, can trigger price wars
+- StochasticCompetitorModel: Random walk competitor prices
+- GBMMarketModel: Geometric Brownian Motion for asset mid-prices
+
+Each model implements the MarketModel protocol.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ..outlet.types import Quote, MarketState, HiddenState
+from ..outlet.math_util import clamp, ema
+
+@dataclass
+class StaticCompetitorConfig:
+    """Configuration for static competitor.
+
+    Attributes:
+        markup: Fixed percentage markup over reference prices
+    """
+    markup: float = 0.1
+
+class StaticCompetitorModel:
+    """Static competitor with fixed markup pricing.
+
+    Competitor prices = reference * (1 + markup).
+    Useful as a baseline or for testing without competitor dynamics.
+    """
+
+    def __init__(self, cfg: StaticCompetitorConfig | None = None, refs: np.ndarray | None = None):
+        self.cfg = cfg or StaticCompetitorConfig()
+        self.refs = refs
+
+    def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
+             rng: np.random.Generator) -> MarketState:
+        refs = self.refs if self.refs is not None else self_quotes.prices
+        comp_prices = refs * (1 + self.cfg.markup)
+        return MarketState(competitor_quotes=comp_prices, regime='static', t=t)
+
+@dataclass
+class ReactiveCompetitorConfig:
+    """Configuration for reactive competitor.
+
+    Attributes:
+        follow_weight: Smoothing weight for price following (0=ignore, 1=instant)
+        band_pct: Maximum deviation from reference prices
+        war_threshold: Relative price diff that triggers price war
+        war_aggression: How much competitor cuts prices during war
+    """
+    follow_weight: float = 0.3
+    band_pct: float = 0.1
+    war_threshold: float = -0.15
+    war_aggression: float = 0.2
+
+class ReactiveCompetitorModel:
+    """Competitor that reacts to agent's prices with price war dynamics.
+
+    The competitor follows the agent's prices with smoothing.
+    If the agent undercuts significantly (beyond war_threshold),
+    a price war is triggered where the competitor becomes more aggressive.
+
+    This creates non-stationary dynamics that test policy robustness.
+    """
+
+    def __init__(self, cfg: ReactiveCompetitorConfig | None = None, refs: np.ndarray | None = None):
+        self.cfg = cfg or ReactiveCompetitorConfig()
+        self.refs = refs
+        self._prices: np.ndarray | None = None
+        self._in_war: bool = False
+
+    def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
+             rng: np.random.Generator) -> MarketState:
+        refs = self.refs if self.refs is not None else self_quotes.prices
+        c = self.cfg
+
+        if self._prices is None:
+            self._prices = refs.copy()
+
+        # check for price war trigger
+        relative_diff = (self_quotes.prices - self._prices) / (self._prices + 1e-8)
+        if np.any(relative_diff < c.war_threshold):
+            self._in_war = True
+        elif np.all(relative_diff > -c.war_threshold / 2):
+            self._in_war = False
+
+        # update prices
+        if self._in_war:
+            target = self_quotes.prices * (1 - c.war_aggression)
+            hidden.regime = 'price_war'
+        else:
+            target = self_quotes.prices * (1 + c.follow_weight * 0.05)
+            hidden.regime = 'normal'
+
+        # follow with smoothing
+        new_prices = np.array([ema(old, new, c.follow_weight)
+                               for old, new in zip(self._prices, target)])
+
+        # stay within band
+        new_prices = clamp(new_prices, refs * (1 - c.band_pct), refs * (1 + c.band_pct))
+        self._prices = new_prices
+
+        return MarketState(competitor_quotes=new_prices, regime=hidden.regime, t=t)
+
+@dataclass
+class StochasticCompetitorConfig:
+    """Configuration for stochastic competitor.
+
+    Attributes:
+        drift: Price drift per step
+        volatility: Price volatility (std of random shocks)
+        mean_revert: Mean reversion strength toward reference
+    """
+    drift: float = 0.0
+    volatility: float = 0.02
+    mean_revert: float = 0.1
+
+class StochasticCompetitorModel:
+    """Ornstein-Uhlenbeck style stochastic competitor prices.
+
+    Prices follow: dP = drift + mean_revert*(ref - P) + volatility*P*dW
+
+    Provides non-stationary competitor dynamics independent of agent actions.
+    Useful for testing robustness to market noise.
+    """
+
+    def __init__(self, cfg: StochasticCompetitorConfig | None = None, refs: np.ndarray | None = None):
+        self.cfg = cfg or StochasticCompetitorConfig()
+        self.refs = refs
+        self._prices: np.ndarray | None = None
+
+    def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
+             rng: np.random.Generator) -> MarketState:
+        refs = self.refs if self.refs is not None else self_quotes.prices
+        c = self.cfg
+
+        if self._prices is None:
+            self._prices = refs.copy()
+
+        # Ornstein-Uhlenbeck style dynamics
+        n = len(self._prices)
+        noise = rng.normal(0, c.volatility, n)
+        reversion = c.mean_revert * (refs - self._prices)
+        self._prices = self._prices + c.drift + reversion + noise * self._prices
+        self._prices = np.maximum(self._prices, refs * 0.5)
+
+        return MarketState(competitor_quotes=self._prices.copy(), regime='stochastic', t=t)
+
+@dataclass
+class GBMMarketConfig:
+    """Configuration for GBM market model.
+
+    Attributes:
+        mu: Price drift (expected return)
+        sigma: Price volatility
+        dt: Time step size
+    """
+    mu: float = 0.0
+    sigma: float = 0.1
+    dt: float = 1.0
+
+class GBMMarketModel:
+    """Geometric Brownian Motion model for asset mid-prices.
+
+    Standard Black-Scholes dynamics: dS = mu*S*dt + sigma*S*dW
+
+    Used for market making scenarios where the underlying asset price
+    follows a random walk. The agent quotes around this moving mid-price.
+    """
+
+    def __init__(self, cfg: GBMMarketConfig | None = None, initial: np.ndarray | None = None):
+        self.cfg = cfg or GBMMarketConfig()
+        self._mids = initial
+
+    def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
+             rng: np.random.Generator) -> MarketState:
+        if self._mids is None:
+            self._mids = self_quotes.prices.copy()
+
+        c = self.cfg
+        n = len(self._mids)
+        z = rng.standard_normal(n)
+        self._mids = self._mids * np.exp((c.mu - 0.5*c.sigma**2)*c.dt + c.sigma*np.sqrt(c.dt)*z)
+
+        vol = np.full(n, c.sigma)
+        return MarketState(mid_prices=self._mids.copy(), volatility=vol, regime='gbm', t=t)
--- a/lab/population/execution.py
+++ b/lab/population/execution.py
@@ -0,0 +1,174 @@
+"""
+Execution models for computing acceptance/fill probabilities.
+
+This module provides different models for how opportunities convert to executions:
+- ElasticityExecutionModel: Price elasticity with competitor cross-effects (retail)
+- IntensityExecutionModel: Distance-based fill intensity (market making)
+- LogitExecutionModel: Discrete choice model
+
+Each model implements the ExecutionModel protocol.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+import numpy as np
+from ..outlet.types import Opportunity, Quote, InstrumentSet, MarketState
+from ..outlet.constants import Side
+from ..outlet.math_util import sigmoid, safe_log, intensity_decay, EPS
+
+@dataclass
+class ElasticityConfig:
+    """Configuration for price elasticity execution model.
+
+    Attributes:
+        base_prob: Baseline purchase probability at reference price
+        price_sensitivity: Own-price elasticity coefficient
+        cross_elasticity: Competitor price cross-elasticity
+        scraper_conversion: Multiplier for scraper conversion (typically << 1)
+    """
+    base_prob: float = 0.3
+    price_sensitivity: float = 2.0
+    cross_elasticity: float = 0.5
+    scraper_conversion: float = 0.01
+
+class ElasticityExecutionModel:
+    """Price elasticity model for retail dynamic pricing.
+
+    P(buy) = base_prob * exp(-sensitivity * log(price/ref)) * cross_effect * scraper_mult
+
+    Higher prices reduce purchase probability exponentially.
+    Competitor undercutting shifts demand away from the platform.
+    Scrapers convert at a much lower rate (reconnaissance, not purchase).
+    """
+
+    def __init__(self, cfg: ElasticityConfig | None = None):
+        self.cfg = cfg or ElasticityConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        idx = int(opp.instrument_id)
+        price = quote.prices[idx]
+        ref = instruments.refs[idx]
+
+        # base probability adjusted by price ratio
+        log_ratio = safe_log(price / ref)
+        prob = self.cfg.base_prob * np.exp(-self.cfg.price_sensitivity * log_ratio)
+
+        # cross-elasticity: competitor undercutting increases their share
+        if market and market.competitor_quotes is not None:
+            comp_price = market.competitor_quotes[idx]
+            if comp_price < price:
+                prob *= np.exp(-self.cfg.cross_elasticity * (price - comp_price) / ref)
+
+        # scrapers convert at much lower rate
+        if opp.context.get('is_scraper', False):
+            prob *= self.cfg.scraper_conversion
+
+        return float(np.clip(prob, 0, 1))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
+                 context: dict[str, Any] | None = None) -> np.ndarray:
+        # simple imputation: assume fills = prob * exposures, invert
+        exposures = context.get('exposures', fills) if context else fills
+        avg_prob = self.cfg.base_prob
+        return fills / (avg_prob + EPS)
+
+@dataclass
+class IntensityConfig:
+    """Configuration for intensity-based execution model.
+
+    Attributes:
+        base_intensity: Baseline fill intensity
+        kappa: Decay rate with distance from mid-price
+        vol_scale: Volatility multiplier for fill intensity
+    """
+    base_intensity: float = 1.0
+    kappa: float = 1.5
+    vol_scale: float = 0.5
+
+class IntensityExecutionModel:
+    """Avellaneda-Stoikov style fill intensity for market making.
+
+    Fill probability decays exponentially with distance from mid-price:
+    P(fill) = base * exp(-kappa * |quote - mid|) * (1 + vol_scale * sigma)
+
+    Tighter spreads (closer to mid) have higher fill probability.
+    Higher volatility increases fill probability (more aggressive traders).
+    """
+
+    def __init__(self, cfg: IntensityConfig | None = None):
+        self.cfg = cfg or IntensityConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        idx = int(opp.instrument_id)
+
+        # get mid price from market or use quote price
+        if market and market.mid_prices is not None:
+            mid = market.mid_prices[idx]
+        else:
+            mid = quote.prices[idx]
+
+        # compute distance from mid
+        if opp.side == Side.BUY:
+            exec_price = quote.asks[idx] if quote.asks is not None else quote.prices[idx]
+            distance = exec_price - mid
+        else:
+            exec_price = quote.bids[idx] if quote.bids is not None else quote.prices[idx]
+            distance = mid - exec_price
+
+        # intensity decays with distance
+        intensity = self.cfg.base_intensity * intensity_decay(abs(distance), self.cfg.kappa)
+
+        # volatility increases fill probability
+        if market and market.volatility is not None:
+            vol = market.volatility[idx]
+            intensity *= (1 + self.cfg.vol_scale * vol)
+
+        return float(np.clip(intensity, 0, 1))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
+                 context: dict[str, Any] | None = None) -> np.ndarray:
+        return fills  # market making doesn't have same censorship concept
+
+@dataclass
+class LogitConfig:
+    """Configuration for logit discrete choice model.
+
+    Attributes:
+        beta_0: Intercept (base utility)
+        beta_price: Price coefficient (typically negative)
+        beta_quality: Quality attribute coefficient
+    """
+    beta_0: float = 0.5
+    beta_price: float = -1.5
+    beta_quality: float = 0.3
+
+class LogitExecutionModel:
+    """Discrete choice logit model for purchase probability.
+
+    Utility: U = beta_0 + beta_price * (price/ref) + beta_quality * quality
+    P(buy) = sigmoid(U)
+
+    Provides a theoretically grounded demand model from economics literature.
+    """
+
+    def __init__(self, cfg: LogitConfig | None = None):
+        self.cfg = cfg or LogitConfig()
+
+    def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
+             market: MarketState | None, rng: np.random.Generator) -> float:
+        idx = int(opp.instrument_id)
+        price = quote.prices[idx]
+        ref = instruments.refs[idx]
+        quality = instruments.instruments[idx].attrs.get('quality', 0.5)
+
+        # utility
+        u = self.cfg.beta_0 + self.cfg.beta_price * (price / ref) + self.cfg.beta_quality * quality
+
+        # choice probability via sigmoid
+        return float(sigmoid(u))
+
+    def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
+                 context: dict[str, Any] | None = None) -> np.ndarray:
+        return fills / (self.cfg.beta_0 + EPS)
--- a/lab/run_example.py
+++ b/lab/run_example.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+"""Example script demonstrating the Quote-Control platform"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import numpy as np
+from lab.config import make_retail_platform, make_market_making_platform
+from lab.experiments.eval import (rollout, compare_policies, fixed_price_policy,
+                                   cost_plus_margin_policy, random_walk_policy)
+
+def demo_retail():
+    print("=" * 60)
+    print("RETAIL DYNAMIC PRICING DEMO")
+    print("=" * 60)
+
+    platform = make_retail_platform()
+    print(f"Instruments: {platform.instruments.n}")
+    print(f"Reference prices: {platform.instruments.refs[:5].round(2)}...")
+
+    # compare policies
+    policies = {
+        'fixed': fixed_price_policy(platform.instruments.refs),
+        'cost_plus_30%': cost_plus_margin_policy(platform.instruments.costs, 0.3),
+        'cost_plus_50%': cost_plus_margin_policy(platform.instruments.costs, 0.5),
+        'random_walk': random_walk_policy(platform.instruments.refs, 0.03),
+    }
+
+    results = compare_policies(platform, policies, n_steps=100, n_runs=3)
+
+    print("\nPolicy Comparison (100 steps, 3 runs):")
+    print("-" * 50)
+    for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_pnl']):
+        print(f"{name:20s} PnL={r['mean_pnl']:8.1f} +/- {r['std_reward']:6.1f}  "
+              f"conv={r['mean_conversion']:.3f}")
+
+def demo_market_making():
+    print("\n" + "=" * 60)
+    print("MARKET MAKING DEMO")
+    print("=" * 60)
+
+    platform = make_market_making_platform()
+    print(f"Instruments: {platform.instruments.n}")
+    print(f"Initial mids: {platform.instruments.refs.round(2)}")
+
+    # simple policy: quote at mid with fixed spread
+    def mm_policy(obs: np.ndarray, t: int):
+        mids = platform.instruments.refs  # would use obs in real policy
+        return mids, 1.0
+
+    result = rollout(platform, mm_policy, n_steps=200, seed=42)
+    print(f"\nRollout (200 steps):")
+    print(f"  Total PnL: {result.total_pnl:.2f}")
+    print(f"  Avg conversion: {result.avg_conversion:.3f}")
+    print(f"  Total spread capture: {sum(m.spread_capture for m in result.metrics):.2f}")
+
+if __name__ == '__main__':
+    demo_retail()
+    demo_market_making()
--- a/sim/case/init.py
+++ b/sim/case/init.py
@@ -0,0 +1,2 @@
+"""Case-specific simulations and experiments."""
+
--- a/sim/case/thesis_simplified/init.py
+++ b/sim/case/thesis_simplified/init.py
@@ -0,0 +1,2 @@
+"""Minimal thesis-aligned pricing simulation (self-contained)."""
+
--- a/sim/case/thesis_simplified/coi.py
+++ b/sim/case/thesis_simplified/coi.py
@@ -0,0 +1,125 @@
+"""Cost of Information (COI) computation for thesis pricing system.
+
+Core KPI: COI = E[p_shown] - p_min measures pricing power from information asymmetry.
+Theorem 1 shows COI erodes as agent queries increase: as N->inf, p^(1)->p_min.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, List, TYPE_CHECKING
+import numpy as np
+
+if TYPE_CHECKING:
+    from .simplified import Session
+
+
+@dataclass(frozen=True)
+class COIWindow:
+    """Windowed COI metrics computed from realized price exposures.
+
+    policy: E[p_shown] - cost, the definition-level KPI
+    agent: E[p^(1)] - cost where p^(1) is min price under agent querying
+    leak: max(policy - agent, 0), observable gap from reconnaissance
+    survival_ratio: agent/policy, fraction of pricing power retained
+    """
+    policy: float
+    agent: float
+    leak: float
+    survival_ratio: float
+    policy_by_product: np.ndarray
+    agent_by_product: np.ndarray
+    demand_weights: np.ndarray
+
+
+def aggregate_prices(sessions: List["Session"], mode: str = "all") -> Dict[int, List[float] | float]:
+    """Unified price aggregation across sessions.
+
+    mode: "all" returns all prices per product, "min_per_session" returns min price per session per product,
+          "min_across" returns single min price per product
+    """
+    if mode == "min_across":
+        mins: Dict[int, float] = {}
+        for s in sessions:
+            for e in s.events:
+                pidx, price = int(e.product_idx), float(e.price_seen)
+                mins[pidx] = min(mins.get(pidx, price), price)
+        return mins
+    elif mode == "min_per_session":
+        result: Dict[int, List[float]] = {}
+        for s in sessions:
+            by_p: Dict[int, float] = {}
+            for e in s.events:
+                pidx, price = int(e.product_idx), float(e.price_seen)
+                by_p[pidx] = min(by_p.get(pidx, price), price)
+            for pidx, pmin in by_p.items():
+                result.setdefault(pidx, []).append(pmin)
+        return result
+    else:  # "all"
+        prices: Dict[int, List[float]] = {}
+        for s in sessions:
+            for e in s.events:
+                prices.setdefault(e.product_idx, []).append(float(e.price_seen))
+        return prices
+
+
+def demand_weights_by_product(sessions: List["Session"], demand_mapping: Dict[str, float], n_products: int) -> np.ndarray:
+    """Compute demand-weighted importance per product."""
+    w = np.zeros(n_products, dtype=float)
+    sessions_by_id = {s.sid: s for s in sessions}
+    for sid, q in demand_mapping.items():
+        sess = sessions_by_id.get(sid)
+        if sess and sess.events:
+            w[int(sess.events[0].product_idx)] += float(q)
+    total = float(np.sum(w))
+    return (w / total) if total > 0 else w
+
+
+def compute_coi_window(sessions: List["Session"], costs: np.ndarray, demand_mapping: Dict[str, float] | None = None) -> COIWindow:
+    """Compute COI metrics over session window.
+
+    Aggregates price exposures and computes policy-level vs agent-realized COI.
+    """
+    n = int(len(costs))
+    prices = aggregate_prices(sessions, mode="all")
+    agent_sessions = [s for s in sessions if s.actor == "A"]
+    agent_min = aggregate_prices(agent_sessions, mode="min_across") if agent_sessions else {}
+
+    policy_by = np.zeros(n, dtype=float)
+    agent_by = np.zeros(n, dtype=float)
+    seen = np.array([(i in prices) for i in range(n)], dtype=bool)
+    agent_seen = np.array([(i in agent_min) for i in range(n)], dtype=bool)
+
+    for pidx, ps in prices.items():
+        if 0 <= pidx < n and ps:
+            policy_by[pidx] = float(np.mean(ps) - float(costs[pidx]))
+    for pidx, pmin in agent_min.items():
+        if 0 <= pidx < n:
+            agent_by[pidx] = float(pmin - float(costs[pidx]))
+
+    agent_by[seen & ~agent_seen] = policy_by[seen & ~agent_seen]  # no erosion if no agent exposure
+
+    demand_w = demand_weights_by_product(sessions, demand_mapping, n) if demand_mapping else np.zeros(n, dtype=float)
+    has_weights = float(np.sum(demand_w)) > 0
+
+    if has_weights:
+        policy, agent = float(np.dot(demand_w, policy_by)), float(np.dot(demand_w, agent_by))
+    elif np.any(seen):
+        policy, agent = float(np.mean(policy_by[seen])), float(np.mean(agent_by[seen]))
+    else:
+        policy, agent = 0.0, 0.0
+
+    leak = float(max(policy - agent, 0.0))
+    survival = float(np.clip(agent / policy, 0.0, 1.0)) if policy > 0 else 0.0
+
+    return COIWindow(policy=policy, agent=agent, leak=leak, survival_ratio=survival,
+                     policy_by_product=policy_by, agent_by_product=agent_by, demand_weights=demand_w)
+
+
+def coi_erosion(coi_policy: float, coi_agent: float, eps: float = 1e-9) -> float:
+    """Thesis-consistent COI erosion: fraction of pricing power destroyed by agent queries.
+
+    erosion = 1 - (COI_agent / COI_policy)
+    When agents find low prices, COI_agent -> 0, erosion -> 1.
+    """
+    if coi_policy <= eps:
+        return 0.0
+    return float(np.clip(1.0 - (coi_agent / (coi_policy + eps)), 0.0, 1.0))
--- a/sim/case/thesis_simplified/experiments.py
+++ b/sim/case/thesis_simplified/experiments.py
@@ -0,0 +1,325 @@
+"""COI leakage experiments and policy comparisons.
+
+Demonstrates the core thesis contribution: COI erosion under agent contamination
+and recovery via robust pricing policies.
+
+Generates TensorBoard logs for:
+- COI erosion curves across contamination levels
+- Policy comparison (fixed vs adaptive vs RL)
+- Revenue/margin trade-offs
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Tuple
+import json
+import numpy as np
+
+try:
+    from torch.utils.tensorboard import SummaryWriter
+    HAS_TB = True
+except ImportError:
+    HAS_TB = False
+
+from .simplified_env import PricingEnv, EnvConfig, make_env
+from .simplified import System
+
+
+@dataclass
+class ExperimentResult:
+    """Container for experiment metrics."""
+    name: str
+    alpha: float
+    reward_mean: float
+    reward_std: float
+    coi_erosion: float
+    alpha_error: float
+    revenue: float
+    margin: float
+
+    def to_dict(self) -> dict:
+        return {k: getattr(self, k) for k in self.__dataclass_fields__}
+
+
+def theoretical_coi_erosion_curve(alphas: np.ndarray, n_sessions: int = 1000) -> np.ndarray:
+    """Theoretical COI erosion from Theorem 1 using order statistic model.
+
+    For N i.i.d. uniform queries on [p_min, p_max]:
+    E[p^(1)] = p_min + (p_max - p_min)/(N+1), so erosion = 1 - 2/(N+1)
+    """
+    erosions = []
+    for a in alphas:
+        n_agents = max(1, int(a * n_sessions))
+        erosions.append(1.0 - 2.0 / (n_agents + 1))
+    return np.array(erosions)
+
+
+def run_policy_episode(
+    env: PricingEnv,
+    policy_fn,
+    n_episodes: int = 10
+) -> Tuple[List[float], List[float], List[float], List[float]]:
+    """Run policy and collect per-step metrics."""
+    rewards, coi_erosions, alpha_errors, revenues = [], [], [], []
+
+    for _ in range(n_episodes):
+        obs, info = env.reset()
+        done = False
+        while not done:
+            action = policy_fn(obs, env.n)
+            obs, reward, terminated, truncated, info = env.step(action)
+            done = terminated or truncated
+            rewards.append(reward)
+            if 'coi_erosion' in info:
+                coi_erosions.append(info['coi_erosion'])
+            if 'alpha_true' in info and 'alpha_est' in info:
+                alpha_errors.append(abs(info['alpha_true'] - info['alpha_est']))
+            if 'revenue' in info:
+                revenues.append(info['revenue'])
+
+    return rewards, coi_erosions, alpha_errors, revenues
+
+
+class PolicyRegistry:
+    """Registry of baseline policies."""
+
+    @staticmethod
+    def fixed(obs: np.ndarray, n: int, margin: float = 0.15) -> np.ndarray:
+        return np.ones(n, dtype=np.float32) * (1.0 + margin)
+
+    @staticmethod
+    def random(obs: np.ndarray, n: int, rng: np.random.Generator = None) -> np.ndarray:
+        rng = rng or np.random.default_rng()
+        return rng.uniform(0.7, 1.3, n).astype(np.float32)
+
+    @staticmethod
+    def adaptive(obs: np.ndarray, n: int, base_margin: float = 0.15) -> np.ndarray:
+        """Reduce margins when alpha estimate is high."""
+        alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
+        margin_scale = 1.0 - 0.4 * alpha_est
+        return np.ones(n, dtype=np.float32) * (1.0 + base_margin * margin_scale)
+
+    @staticmethod
+    def aggressive(obs: np.ndarray, n: int) -> np.ndarray:
+        """High margins, ignores contamination."""
+        return np.ones(n, dtype=np.float32) * 1.4
+
+    @staticmethod
+    def defensive(obs: np.ndarray, n: int) -> np.ndarray:
+        """Low margins, always cautious."""
+        return np.ones(n, dtype=np.float32) * 1.05
+
+    @staticmethod
+    def alpha_proportional(obs: np.ndarray, n: int, max_margin: float = 0.3) -> np.ndarray:
+        """Margin inversely proportional to estimated alpha."""
+        alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
+        margin = max_margin * (1.0 - alpha_est)
+        return np.ones(n, dtype=np.float32) * (1.0 + margin)
+
+
+def run_contamination_sweep(
+    alphas: List[float],
+    policies: Dict[str, callable],
+    n_products: int = 10,
+    max_steps: int = 200,
+    n_episodes: int = 10,
+    seed: int = 42,
+    log_dir: str = None
+) -> Dict[str, List[ExperimentResult]]:
+    """Run policies across contamination levels."""
+
+    results = {name: [] for name in policies}
+    writer = SummaryWriter(Path(log_dir) / "sweep") if log_dir and HAS_TB else None
+
+    for alpha in alphas:
+        print(f"  alpha={alpha:.2f}", end=" ")
+        env_cfg = EnvConfig(
+            n_products=n_products, max_steps=max_steps,
+            alpha_true=alpha, reward_mode="robust", seed=seed)
+        env = make_env(env_cfg)
+
+        for name, policy_fn in policies.items():
+            rewards, coi_vals, alpha_errs, revenues = run_policy_episode(env, policy_fn, n_episodes)
+
+            result = ExperimentResult(
+                name=name, alpha=alpha,
+                reward_mean=float(np.mean(rewards)),
+                reward_std=float(np.std(rewards)),
+                coi_erosion=float(np.mean(coi_vals)) if coi_vals else 0.0,
+                alpha_error=float(np.mean(alpha_errs)) if alpha_errs else 0.0,
+                revenue=float(np.mean(revenues)) if revenues else 0.0,
+                margin=float(np.mean([policy_fn(np.zeros(3 * n_products + 3), n_products)]) - 1.0))
+
+            results[name].append(result)
+
+            if writer:
+                step = int(alpha * 100)
+                writer.add_scalar(f'{name}/reward', result.reward_mean, step)
+                writer.add_scalar(f'{name}/coi_erosion', result.coi_erosion, step)
+                writer.add_scalar(f'{name}/alpha_error', result.alpha_error, step)
+                writer.add_scalar(f'{name}/revenue', result.revenue, step)
+
+        print(f"done")
+
+    # add theoretical curve
+    if writer:
+        theo = theoretical_coi_erosion_curve(np.array(alphas))
+        for i, (a, e) in enumerate(zip(alphas, theo)):
+            writer.add_scalar('theoretical/coi_erosion', e, int(a * 100))
+        writer.close()
+
+    return results
+
+
+def run_coi_demonstration(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
+    """Main COI demonstration experiment."""
+    print("=== COI Leakage Demonstration ===\n")
+
+    Path(log_dir).mkdir(parents=True, exist_ok=True)
+    writer = SummaryWriter(Path(log_dir) / "coi_demo") if HAS_TB else None
+
+    # theoretical erosion curve
+    print("1. Theoretical COI erosion (Theorem 1)")
+    alphas = np.linspace(0.0, 0.6, 13)
+    theo_erosion = theoretical_coi_erosion_curve(alphas, n_sessions=1000)
+
+    for a, e in zip(alphas, theo_erosion):
+        print(f"   alpha={a:.2f} -> erosion={e:.3f}")
+        if writer:
+            writer.add_scalar('theory/coi_erosion', e, int(a * 100))
+
+    # policy comparison
+    print("\n2. Policy comparison across contamination levels")
+    policies = {
+        'fixed': lambda obs, n: PolicyRegistry.fixed(obs, n),
+        'aggressive': PolicyRegistry.aggressive,
+        'defensive': PolicyRegistry.defensive,
+        'adaptive': PolicyRegistry.adaptive,
+        'alpha_proportional': PolicyRegistry.alpha_proportional,
+    }
+
+    sweep_alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
+    results = run_contamination_sweep(
+        sweep_alphas, policies, n_products=10, max_steps=100,
+        n_episodes=5, seed=seed, log_dir=log_dir)
+
+    # summarize
+    print("\n3. Summary by policy")
+    for name, res_list in results.items():
+        avg_reward = np.mean([r.reward_mean for r in res_list])
+        avg_coi = np.mean([r.coi_erosion for r in res_list])
+        print(f"   {name:20s}: avg_reward={avg_reward:.2f}, avg_coi={avg_coi:.3f}")
+
+    # save results
+    output = {
+        'theoretical': {'alphas': alphas.tolist(), 'erosion': theo_erosion.tolist()},
+        'empirical': {name: [r.to_dict() for r in res_list] for name, res_list in results.items()}}
+
+    with open(Path(log_dir) / "coi_demo_results.json", 'w') as f:
+        json.dump(output, f, indent=2)
+
+    if writer:
+        writer.close()
+
+    print(f"\nResults saved to {log_dir}/coi_demo_results.json")
+    print(f"TensorBoard: tensorboard --logdir {log_dir}")
+
+    return output
+
+
+def run_reward_mode_comparison(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
+    """Compare different reward modes."""
+    print("=== Reward Mode Comparison ===\n")
+
+    Path(log_dir).mkdir(parents=True, exist_ok=True)
+    writer = SummaryWriter(Path(log_dir) / "reward_modes") if HAS_TB else None
+
+    reward_modes = ["revenue", "profit", "robust", "coi_aware"]
+    alpha = 0.3  # moderate contamination
+
+    results = {}
+    for mode in reward_modes:
+        print(f"  mode={mode}", end=" ")
+        env_cfg = EnvConfig(
+            n_products=10, max_steps=200, alpha_true=alpha,
+            reward_mode=mode, seed=seed)
+        env = make_env(env_cfg)
+
+        rewards, coi_vals, _, revenues = run_policy_episode(
+            env, PolicyRegistry.adaptive, n_episodes=10)
+
+        results[mode] = {
+            'reward_mean': float(np.mean(rewards)),
+            'reward_std': float(np.std(rewards)),
+            'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
+            'revenue': float(np.mean(revenues)) if revenues else 0.0}
+
+        if writer:
+            for k, v in results[mode].items():
+                writer.add_scalar(f'{mode}/{k}', v, 0)
+
+        print(f"reward={results[mode]['reward_mean']:.2f}, coi={results[mode]['coi_erosion']:.3f}")
+
+    if writer:
+        writer.close()
+
+    with open(Path(log_dir) / "reward_mode_results.json", 'w') as f:
+        json.dump(results, f, indent=2)
+
+    return results
+
+
+def run_alpha_drift_experiment(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
+    """Test policy robustness under non-stationary contamination."""
+    print("=== Alpha Drift Experiment ===\n")
+
+    Path(log_dir).mkdir(parents=True, exist_ok=True)
+    writer = SummaryWriter(Path(log_dir) / "alpha_drift") if HAS_TB else None
+
+    drift_rates = [0.0, 0.01, 0.02, 0.05]
+    results = {}
+
+    for drift in drift_rates:
+        print(f"  drift={drift:.2f}", end=" ")
+        env_cfg = EnvConfig(
+            n_products=10, max_steps=200, alpha_true=0.2,
+            alpha_drift=drift, reward_mode="robust", seed=seed)
+        env = make_env(env_cfg)
+
+        rewards, coi_vals, alpha_errs, _ = run_policy_episode(
+            env, PolicyRegistry.adaptive, n_episodes=10)
+
+        results[f'drift_{drift}'] = {
+            'reward_mean': float(np.mean(rewards)),
+            'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
+            'alpha_tracking_error': float(np.mean(alpha_errs)) if alpha_errs else 0.0}
+
+        if writer:
+            for k, v in results[f'drift_{drift}'].items():
+                writer.add_scalar(f'drift_{drift}/{k}', v, 0)
+
+        print(f"reward={results[f'drift_{drift}']['reward_mean']:.2f}, "
+              f"alpha_err={results[f'drift_{drift}']['alpha_tracking_error']:.3f}")
+
+    if writer:
+        writer.close()
+
+    return results
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Run COI experiments")
+    parser.add_argument("--exp", type=str, default="coi", choices=["coi", "reward", "drift", "all"])
+    parser.add_argument("--log-dir", type=str, default="sim/case/thesis_simplified/runs")
+    parser.add_argument("--seed", type=int, default=42)
+    args = parser.parse_args()
+
+    if args.exp == "coi" or args.exp == "all":
+        run_coi_demonstration(args.log_dir, args.seed)
+
+    if args.exp == "reward" or args.exp == "all":
+        run_reward_mode_comparison(args.log_dir, args.seed)
+
+    if args.exp == "drift" or args.exp == "all":
+        run_alpha_drift_experiment(args.log_dir, args.seed)
--- a/sim/case/thesis_simplified/separability.py
+++ b/sim/case/thesis_simplified/separability.py
@@ -0,0 +1,72 @@
+"""Behavioral separability for human/agent detection.
+
+Computes divergence signals delta_H, delta_A from session trajectories using
+transition kernel estimation and KL divergence to prototype behavioral profiles.
+"""
+from __future__ import annotations
+from typing import Dict, List, Tuple, TYPE_CHECKING
+import numpy as np
+
+if TYPE_CHECKING:
+    from .simplified import Event, Session
+
+
+# prototype behavioral kernels for human vs agent sessions
+TRANS_H = {
+    "start": {"view": 0.85, "end": 0.15},
+    "view": {"detail": 0.4, "cart": 0.3, "view": 0.2, "end": 0.1},
+    "detail": {"cart": 0.5, "view": 0.3, "end": 0.2},
+    "cart": {"purchase": 0.6, "view": 0.25, "end": 0.15},
+    "purchase": {"end": 1.0},
+}
+
+TRANS_A = {
+    "start": {"view": 0.95, "end": 0.05},
+    "view": {"detail": 0.6, "view": 0.25, "cart": 0.1, "end": 0.05},
+    "detail": {"view": 0.5, "cart": 0.15, "detail": 0.3, "end": 0.05},
+    "cart": {"view": 0.4, "purchase": 0.2, "end": 0.4},
+    "purchase": {"end": 1.0},
+}
+
+
+def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
+    """KL divergence D_KL(p || q) for discrete distributions."""
+    keys = set(p.keys()) | set(q.keys())
+    return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys)
+
+
+def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
+    """Build empirical transition kernel T' from trajectory events."""
+    trans: Dict[str, Dict[str, int]] = {}
+    prev = "start"
+    for e in events:
+        curr = e.action
+        trans.setdefault(prev, {})
+        trans[prev][curr] = trans[prev].get(curr, 0) + 1
+        prev = curr
+    return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0}
+
+
+def compute_divergence(session: "Session") -> Tuple[float, float]:
+    """Compute divergence signals delta_H, delta_A for session.
+
+    delta_H = mean KL(T' || T_H) across states, measures distance to human prototype
+    delta_A = mean KL(T' || T_A) across states, measures distance to agent prototype
+    """
+    kernel = build_kernel(session.events)
+    if not kernel:
+        return 0.5, 0.5
+    delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel)
+    delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel)
+    return delta_h, delta_a
+
+
+def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
+    """Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)).
+
+    Returns probability session is agent-generated based on behavioral divergence.
+    """
+    dh, da = compute_divergence(session)
+    if (dh + da) <= 0:
+        return 0.5
+    return 1.0 / (1.0 + np.exp(-beta * (dh - da)))
--- a/sim/case/thesis_simplified/simplified.py
+++ b/sim/case/thesis_simplified/simplified.py
@@ -0,0 +1,219 @@
+"""Minimal implementation of thesis pricing system.
+
+Implements the core loop: prices -> sessions -> demand -> prices
+with behavioral separability and robust pricing objective.
+
+Objects:
+- Session trajectories tau_s from mixture of H/A behavioral profiles
+- Demand proxy q_hat via weighted action aggregation
+- COI leakage penalty for agent reconnaissance
+- Limbo: alternating price/demand history for trajectory analysis
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple
+import numpy as np
+
+from .coi import COIWindow, compute_coi_window
+from .separability import TRANS_H, TRANS_A, kl_div, build_kernel, compute_divergence, estimate_alpha
+
+ACTION_WEIGHTS = {"add_to_cart": 0.8, "checkout": 0.9, "purchase": 1.0, "view": 0.15, "detail": 0.25, "hover": 0.3, "start": 0.05, "end": 0.0}
+
+
+@dataclass
+class Event:
+    action: str
+    product_idx: int
+    price_seen: float
+    ts: float
+
+
+@dataclass
+class Session:
+    sid: str
+    events: List[Event]
+    actor: str  # H or A (ground truth label)
+    theta: Dict[str, float] = field(default_factory=dict)
+
+
+def compute_demand(session: Session) -> float:
+    """Compute demand proxy q_hat = sum_k omega(a_k) for session."""
+    return sum(ACTION_WEIGHTS.get(e.action, 0.1) for e in session.events)
+
+
+def sample_trajectory(rng: np.random.Generator, trans: Dict, prices: np.ndarray, costs: np.ndarray, theta: Dict[str, float],
+                      is_agent: bool, session_noise: float = 0.02, surge: float = 0.08, max_mult: float = 1.8) -> Tuple[List[Event], int]:
+    """Sample session trajectory from behavioral kernel."""
+    pidx = int(rng.integers(0, len(prices)))
+    cost, base = float(costs[pidx]), float(prices[pidx]) * (1.0 + rng.normal(0.0, session_noise))
+    base = float(np.clip(base, cost * 1.01, float(prices[pidx]) * 2.0))
+    price, signal, state, t = base, 0.0, "start", 0.0
+    events = []
+
+    while state != "end" and len(events) < 30:
+        probs = trans.get(state, {"end": 1.0})
+        nxt = rng.choice(list(probs.keys()), p=list(probs.values()))
+        if nxt == "purchase":  # purchase conversion check
+            rel = max((price - cost) / (cost + 1e-6), 0.0)
+            p_buy = float(np.clip(theta.get("base_conv", 0.2) * np.exp(-theta.get("price_sens", 2.0) * rel), 0.0, 1.0))
+            if rng.random() > p_buy:
+                nxt = "end"
+        state = nxt
+        if state not in {"start", "end"}:
+            events.append(Event(action=state, product_idx=pidx, price_seen=float(price), ts=t))
+            signal += float(ACTION_WEIGHTS.get(state, 0.1))
+            price = float(np.clip(base * (1.0 + surge * signal), cost * 1.01, base * max_mult))
+        t += max(0.2, rng.gamma(1.5, 0.8) if is_agent else rng.gamma(2.0, 1.2))
+    return events, pidx
+
+
+def put_prices_to_market(prices: np.ndarray, costs: np.ndarray, alpha: float = 0.2, n_sessions: int = 50,
+                         seed: int | None = None) -> Tuple[List[Session], Dict[str, float]]:
+    """Generate sessions from mixture model. Returns sessions and demand mapping sid -> q_hat."""
+    rng = np.random.default_rng(seed)
+    sessions, demand = [], {}
+    for i in range(n_sessions):
+        sid = f"s{i:04d}"
+        is_agent = rng.random() < alpha
+        trans = TRANS_A if is_agent else TRANS_H
+        theta = {"price_sens": rng.uniform(0.05, 0.2), "base_conv": 0.01} if is_agent else \
+                {"price_sens": rng.uniform(1.5, 4.0), "base_conv": rng.uniform(0.2, 0.5)}
+        events, _ = sample_trajectory(rng, trans, prices, costs=costs, theta=theta, is_agent=is_agent)
+        session = Session(sid=sid, events=events, actor="A" if is_agent else "H", theta=theta)
+        sessions.append(session)
+        demand[sid] = compute_demand(session)
+    return sessions, demand
+
+
+@dataclass
+class LimboUpdate:
+    utype: str  # "prices" or "demand"
+    data: np.ndarray | Dict[str, float]
+    t: int
+
+
+class Limbo:
+    """Historical trajectory of alternating price/demand observations."""
+
+    def __init__(self):
+        self.history: List[LimboUpdate] = []
+        self._t = 0
+
+    def add_update(self, utype: str, data: np.ndarray | Dict[str, float]) -> Dict:
+        self.history.append(LimboUpdate(utype=utype, data=data, t=self._t))
+        self._t += 1
+        return {"action": "observe_demand" if utype == "prices" else "set_prices"}
+
+    def get_prices_history(self) -> List[np.ndarray]:
+        return [u.data for u in self.history if u.utype == "prices"]
+
+    def get_demand_history(self) -> List[Dict[str, float]]:
+        return [u.data for u in self.history if u.utype == "demand"]
+
+
+class System:
+    """Main pricing system implementing robust Stackelberg objective.
+
+    Manages the alternating loop: set prices p_t -> observe demand Q_hat(p_t) ->
+    estimate contamination alpha from behavioral signals -> compute next prices.
+    """
+
+    def __init__(self, n_products: int = 10, costs: np.ndarray | None = None, lambda_coi: float = 0.5, seed: int | None = 42):
+        self.n = n_products
+        self.rng = np.random.default_rng(seed)
+        self.costs = costs if costs is not None else self.rng.uniform(10, 50, n_products)
+        self.refs = self.costs * (1 + self.rng.uniform(0.2, 0.5, n_products))
+        self.lambda_coi = lambda_coi
+        self.limbo = Limbo()
+        self._alpha_est = 0.2
+        self._sessions: List[Session] = []
+        self._last_sessions: List[Session] = []
+        self._last_coi: COIWindow | None = None
+
+    @property
+    def alpha(self) -> float:
+        return self._alpha_est
+
+    def _estimate_alpha_from_sessions(self) -> float:
+        if not self._sessions:
+            return self._alpha_est
+        return float(np.mean([estimate_alpha(s) for s in self._sessions[-50:]]))
+
+    def _revenue_under_demand(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
+        agg = np.zeros(self.n)
+        for sid, q in demand.items():
+            sess = next((s for s in self._sessions if s.sid == sid), None)
+            if sess and sess.events:
+                agg[sess.events[0].product_idx] += q
+        return float(np.dot(prices, agg))
+
+    def _compute_coi_window(self, demand: Dict[str, float]) -> COIWindow:
+        if not self._last_sessions:
+            zeros = np.zeros(self.n, dtype=float)
+            return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=0.0,
+                             policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)
+        return compute_coi_window(self._last_sessions, self.costs, demand_mapping=demand)
+
+    def _objective(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
+        """Robust objective: R(p,d) - lambda * COI_leak."""
+        profit = self._revenue_under_demand(prices, demand) - float(np.sum(self.costs))
+        self._last_coi = self._compute_coi_window(demand)
+        return profit - self.lambda_coi * self._last_coi.leak
+
+    def compute_prices(self, demand: Dict[str, float] | None = None) -> np.ndarray:
+        """Compute next prices via heuristic margin adjustment based on alpha estimate."""
+        self._alpha_est = self._estimate_alpha_from_sessions()
+        margin_scale = 1.0 - 0.5 * self._alpha_est  # defensive pricing under high contamination
+        margins = (self.refs - self.costs) * margin_scale
+        noise = self.rng.normal(0, 0.02, self.n) * self.costs
+        prices = np.clip(self.costs + margins + noise, self.costs * 1.02, self.refs * 1.3)
+        self.limbo.add_update("prices", prices)
+        return prices
+
+    def observe_demand(self, prices: np.ndarray, alpha_true: float = 0.2, n_sessions: int = 50) -> Dict[str, float]:
+        sessions, demand_map = put_prices_to_market(prices, costs=self.costs, alpha=alpha_true,
+                                                    n_sessions=n_sessions, seed=int(self.rng.integers(0, 10000)))
+        self._last_sessions = sessions
+        self._sessions.extend(sessions)
+        self.limbo.add_update("demand", demand_map)
+        return demand_map
+
+    def step(self, alpha_true: float = 0.2, n_sessions: int = 50) -> Tuple[np.ndarray, Dict[str, float], float, COIWindow]:
+        demand_hist = self.limbo.get_demand_history()
+        prices = self.compute_prices(demand_hist[-1] if demand_hist else None)
+        demand = self.observe_demand(prices, alpha_true, n_sessions)
+        reward = self._objective(prices, demand)
+        return prices, demand, reward, self._last_coi or self._compute_coi_window(demand)
+
+    def run(self, n_steps: int = 100, alpha_true: float = 0.2) -> Dict:
+        traj = {"prices": [], "demand": [], "rewards": [], "alpha_est": [], "alpha_true": alpha_true,
+                "coi_policy": [], "coi_agent": [], "coi_leak": [], "coi_survival": []}
+        for _ in range(n_steps):
+            p, d, r, coi = self.step(alpha_true)
+            traj["prices"].append(p); traj["demand"].append(d); traj["rewards"].append(r)
+            traj["alpha_est"].append(self._alpha_est)
+            traj["coi_policy"].append(coi.policy); traj["coi_agent"].append(coi.agent)
+            traj["coi_leak"].append(coi.leak); traj["coi_survival"].append(coi.survival_ratio)
+        return traj
+
+
+if __name__ == "__main__":
+    sys = System(n_products=5, seed=42)
+    traj = sys.run(n_steps=20, alpha_true=0.25)
+    print(f"avg reward: {np.mean(traj['rewards']):.2f}, final alpha_hat: {traj['alpha_est'][-1]:.3f}, "
+          f"COI_policy: {np.mean(traj['coi_policy']):.3f}, COI_agent: {np.mean(traj['coi_agent']):.3f}, leak: {np.mean(traj['coi_leak']):.3f}")
+
+    prices = np.array([20.0, 35.0, 50.0, 25.0, 40.0])
+    costs = np.array([15.0, 28.0, 40.0, 18.0, 30.0])
+    sessions, demand = put_prices_to_market(prices, costs=costs, alpha=0.3, n_sessions=20, seed=123)
+    print(f'sessions: {len(sessions)}, agents: {sum(1 for s in sessions if s.actor=="A")}')
+
+    for n in [1, 5, 10, 50, 100]:
+        # theoretical: erosion = 1 - 2/(N+1) for uniform order statistic
+        print(f'N={n:3d} agents -> COI erosion: {1.0 - 2.0/(n+1):.3f}')
+
+    events = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.5), Event('cart', 0, 20.0, 1.0), Event('purchase', 0, 20.0, 2.0)]
+    print(f'human-like session alpha_hat: {estimate_alpha(Session(sid="test", events=events, actor="H")):.3f}')
+
+    events_a = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.2), Event('view', 0, 20.0, 0.3), Event('detail', 0, 20.0, 0.4)]
+    print(f'agent-like session alpha_hat: {estimate_alpha(Session(sid="test2", events=events_a, actor="A")):.3f}')
--- a/sim/case/thesis_simplified/simplified_env.py
+++ b/sim/case/thesis_simplified/simplified_env.py
@@ -0,0 +1,249 @@
+"""Gymnasium-compatible RL environment for thesis pricing system.
+
+Wraps simplified.System with standard Gym interface for training pricing policies.
+Supports multiple reward modes and contamination scenarios.
+
+Action: price multipliers [0.5, 1.5] applied to reference prices
+Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
+Reward: configurable objective (revenue, profit, robust, coi-aware)
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, Tuple
+import numpy as np
+
+try:
+    import gymnasium as gym
+    from gymnasium import spaces
+    HAS_GYM = True
+except ImportError:
+    HAS_GYM = False
+
+from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
+from .coi import COIWindow, compute_coi_window, coi_erosion
+
+
+@dataclass
+class EnvConfig:
+    n_products: int = 5
+    max_steps: int = 200
+    sessions_per_step: int = 30
+    alpha_true: float = 0.2
+    alpha_drift: float = 0.0
+    alpha_bounds: Tuple[float, float] = (0.0, 0.6)
+    lambda_coi: float = 0.5
+    lambda_vol: float = 0.1
+    reward_mode: str = "robust"  # revenue | profit | robust | coi_aware
+    normalize_reward: bool = True
+    seed: int | None = 42
+
+
+def aggregate_purchases(sessions: list[Session], n_products: int, costs: np.ndarray) -> Tuple[np.ndarray, float, float]:
+    """Aggregate purchases from sessions, returns (counts, revenue, cost)."""
+    purchases = np.zeros(n_products, dtype=float)
+    revenue, cost = 0.0, 0.0
+    for sess in sessions:
+        for e in sess.events:
+            if e.action == "purchase" and 0 <= e.product_idx < n_products:
+                purchases[e.product_idx] += 1.0
+                revenue += float(e.price_seen)
+                cost += float(costs[e.product_idx])
+    return purchases, revenue, cost
+
+
+class PricingEnv(gym.Env if HAS_GYM else object):
+    """RL environment for dynamic pricing under agent contamination.
+
+    Platform sets prices p_t, market responds with mixture demand Q(p) = (1-alpha)*D_H + alpha*D_A.
+    Agent estimates contamination alpha_hat from behavioral signals.
+    Reward balances profit vs COI leakage.
+    """
+    metadata = {"render_modes": ["human", "ansi"]}
+
+    def __init__(self, cfg: EnvConfig | None = None):
+        if not HAS_GYM:
+            raise ImportError("gymnasium required")
+        self.cfg = cfg or EnvConfig()
+        self.n = self.cfg.n_products
+        self._sys: System | None = None
+        self._t = 0
+        self._alpha = self.cfg.alpha_true
+        self._last_prices: np.ndarray | None = None
+        self._last_demand: Dict[str, float] | None = None
+        self._episode_rewards: list[float] = []
+        self._demand_agg = np.zeros(self.n)
+
+        self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
+        obs_dim = self.n + self.n + 1 + 1 + self.n + 1  # prices + demand + alpha_hat + alpha + margins + t
+        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
+
+    def _build_obs(self) -> np.ndarray:
+        if self._sys is None:
+            return np.zeros(self.observation_space.shape[0], dtype=np.float32)
+        prices = self._last_prices if self._last_prices is not None else self._sys.refs
+        return np.concatenate([
+            prices / (self._sys.refs + 1e-6),
+            self._demand_agg / (np.sum(self._demand_agg) + 1e-6),
+            [self._sys.alpha, self._alpha],
+            (prices - self._sys.costs) / (self._sys.costs + 1e-6),
+            [self._t / self.cfg.max_steps],
+        ]).astype(np.float32)
+
+    def _compute_reward(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
+        cfg, sys = self.cfg, self._sys
+        if sys is None:
+            return 0.0
+
+        # aggregate demand per product
+        agg = np.zeros(self.n)
+        for sid, q in demand.items():
+            sess = next((s for s in sys._sessions if s.sid == sid), None)
+            if sess and sess.events:
+                agg[sess.events[0].product_idx] += q
+        self._demand_agg = agg
+
+        _, revenue, cost = aggregate_purchases(sys._last_sessions, self.n, sys.costs)
+        profit = revenue - cost
+
+        vol_penalty = 0.0
+        if self._last_prices is not None:
+            vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))
+
+        coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
+        leak = float(coi.leak)
+
+        reward_fns = {
+            "revenue": lambda: revenue,
+            "profit": lambda: profit,
+            "robust": lambda: profit - cfg.lambda_coi * leak - vol_penalty,
+            "coi_aware": lambda: profit - cfg.lambda_coi * (1 + 2 * sys.alpha) * leak - vol_penalty,
+        }
+        r = reward_fns.get(cfg.reward_mode, lambda: profit)()
+        return float(r / (float(np.sum(sys.refs)) + 1e-6)) if cfg.normalize_reward else float(r)
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
+        seed = seed if seed is not None else self.cfg.seed
+        self._sys = System(n_products=self.n, lambda_coi=self.cfg.lambda_coi, seed=seed)
+        self._t, self._alpha = 0, self.cfg.alpha_true
+        self._last_prices, self._last_demand = None, None
+        self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
+        return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
+                                   "costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
+        if self._sys is None:
+            raise RuntimeError("call reset() first")
+
+        action = np.clip(action, 0.5, 1.5)
+        prices = np.clip(self._sys.refs * action.astype(np.float64), self._sys.costs * 1.01, self._sys.refs * 2.0)
+        demand = self._sys.observe_demand(prices, alpha_true=self._alpha, n_sessions=self.cfg.sessions_per_step)
+        self._sys.limbo.add_update("prices", prices)
+        self._sys._alpha_est = self._sys._estimate_alpha_from_sessions()
+
+        reward = self._compute_reward(prices, demand)
+        self._episode_rewards.append(reward)
+        self._last_prices, self._last_demand = prices.copy(), demand
+        self._t += 1
+
+        # compute info metrics using shared helper
+        purchases, revenue, cost = aggregate_purchases(self._sys._last_sessions, self.n, self._sys.costs)
+        n_agents = int(self._alpha * self.cfg.sessions_per_step)
+        coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)
+
+        info = {
+            "alpha_true": self._alpha, "alpha_est": self._sys.alpha,
+            "alpha_error": abs(self._alpha - self._sys.alpha),
+            "revenue": float(revenue), "profit": float(revenue - cost), "cost": float(cost),
+            "n_purchases": int(np.sum(purchases)),
+            "avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
+            "n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
+            "coi_erosion": coi_erosion(coi.policy, coi.agent),
+            "coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
+            "coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
+            "cumulative_reward": sum(self._episode_rewards), "step": self._t,
+        }
+        return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info
+
+    def render(self, mode: str = "human") -> str | None:
+        if self._sys is None or self._last_prices is None:
+            return None
+        out = f"t={self._t}/{self.cfg.max_steps} | alpha_true={self._alpha:.3f} alpha_hat={self._sys.alpha:.3f} | " \
+              f"prices: {self._last_prices.round(1)} | demand: {self._demand_agg.round(2)} | " \
+              f"reward: {self._episode_rewards[-1] if self._episode_rewards else 0:.3f}"
+        if mode == "human":
+            print(out)
+        return out
+
+    def close(self) -> None:
+        pass
+
+
+class ContaminationSweepEnv(PricingEnv):
+    """Environment that sweeps through contamination levels during training."""
+
+    def __init__(self, cfg: EnvConfig | None = None, alpha_schedule: list[float] | None = None):
+        super().__init__(cfg)
+        self._schedule = alpha_schedule or [0.1, 0.2, 0.3, 0.4, 0.5]
+        self._schedule_idx = 0
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
+        if options and options.get("advance_schedule", False):
+            self._schedule_idx = (self._schedule_idx + 1) % len(self._schedule)
+        self.cfg.alpha_true = self._schedule[self._schedule_idx]
+        return super().reset(seed, options)
+
+
+class AdversarialEnv(PricingEnv):
+    """Environment with adversarial contamination dynamics.
+
+    Contamination increases when prices are predictable (agents exploit).
+    """
+
+    def __init__(self, cfg: EnvConfig | None = None, exploitation_rate: float = 0.02):
+        super().__init__(cfg)
+        self._exploit_rate = exploitation_rate
+        self._price_history: list[np.ndarray] = []
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
+        obs, reward, term, trunc, info = super().step(action)
+        if self._last_prices is not None:
+            self._price_history.append(self._last_prices.copy())
+        predictability = 0.0
+        if len(self._price_history) > 10:
+            predictability = 1.0 / (float(np.std(self._price_history[-10:])) + 0.1)
+            self._alpha = np.clip(self._alpha + self._exploit_rate * predictability * self._sys.rng.random(), *self.cfg.alpha_bounds)
+        info["predictability"] = predictability
+        return obs, reward, term, trunc, info
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
+        self._price_history = []
+        return super().reset(seed, options)
+
+
+def make_env(cfg: EnvConfig | None = None, env_type: str = "standard") -> PricingEnv:
+    return {"sweep": ContaminationSweepEnv, "adversarial": AdversarialEnv}.get(env_type, PricingEnv)(cfg)
+
+
+# baseline policies
+fixed_price_policy = lambda refs, margin=0.0: np.ones(len(refs), dtype=np.float32) * (1.0 + margin)
+random_policy = lambda n, rng=None: (rng or np.random.default_rng()).uniform(0.7, 1.3, n).astype(np.float32)
+adaptive_policy = lambda obs, n, base=0.1: np.ones(n, dtype=np.float32) * (1.0 + base * (1.0 - 0.4 * obs[2 * n]))
+
+
+if __name__ == "__main__":
+    cfg = EnvConfig(n_products=100, max_steps=100, alpha_true=0.25, reward_mode="robust")
+    env = make_env(cfg)
+    obs, info = env.reset()
+    print(f"initial: alpha={info['alpha_true']:.2f}")
+
+    total_reward = 0.0
+    for t in range(cfg.max_steps):
+        action = adaptive_policy(obs, cfg.n_products)
+        obs, reward, done, _, info = env.step(action)
+        total_reward += reward
+        if t % 10 == 0:
+            env.render()
+        if done:
+            break
+
+    print(f"\ntotal reward: {total_reward:.2f}, final alpha_hat: {info['alpha_est']:.3f}")
--- a/sim/case/thesis_simplified/summarize.py
+++ b/sim/case/thesis_simplified/summarize.py
@@ -0,0 +1,168 @@
+"""Summarize TensorBoard logs into comparison tables."""
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from collections import defaultdict
+from dataclasses import dataclass
+import pandas as pd
+
+try:
+    from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+    HAS_TB = True
+except ImportError:
+    HAS_TB = False
+
+
+@dataclass
+class RunInfo:
+    algo: str
+    alpha: float
+    reward_mode: str
+    path: Path
+
+
+def parse_run_name(name: str) -> RunInfo | None:
+    """Extract algo, alpha, reward_mode from run directory name."""
+    # patterns: ppo_a0.20_robust, cmp_fixed_a0.20, sac_a0.90_robust
+    m = re.match(r'(cmp_)?(\w+)_a([\d.]+)_?(\w+)?', name)
+    if not m:
+        return None
+    prefix, algo, alpha, mode = m.groups()
+    return RunInfo(algo=algo, alpha=float(alpha), reward_mode=mode or 'robust', path=Path())
+
+
+def load_tb_scalars(log_dir: Path, tags: list[str], reduce: str = 'last') -> dict[str, float]:
+    """Load scalar values from TensorBoard event files."""
+    if not HAS_TB:
+        return {}
+    ea = EventAccumulator(str(log_dir))
+    ea.Reload()
+    results = {}
+    for tag in tags:
+        if tag in ea.Tags().get('scalars', []):
+            events = ea.Scalars(tag)
+            if not events:
+                continue
+            vals = [e.value for e in events]
+            if reduce == 'last':
+                results[tag] = vals[-1]
+            elif reduce == 'mean':
+                results[tag] = sum(vals) / len(vals)
+            elif reduce == 'max':
+                results[tag] = max(vals)
+            elif reduce == 'min':
+                results[tag] = min(vals)
+    return results
+
+
+def load_json_results(log_dir: Path) -> dict[str, float]:
+    """Load metrics from results.json if available."""
+    results_file = log_dir / 'results.json'
+    if results_file.exists():
+        with open(results_file) as f:
+            return json.load(f)
+    return {}
+
+
+def discover_runs(base_dir: Path) -> list[RunInfo]:
+    """Find all experiment runs in base directory."""
+    runs = []
+    for d in base_dir.iterdir():
+        if not d.is_dir():
+            continue
+        info = parse_run_name(d.name)
+        if info:
+            info.path = d
+            runs.append(info)
+    return runs
+
+
+def build_tables(runs: list[RunInfo], metrics: list[str], reduce: str = 'last') -> dict[str, dict[str, pd.DataFrame]]:
+    """Build pivot tables: reward_mode -> metric -> DataFrame[alpha x algo]."""
+    # collect data: {reward_mode: {metric: {(alpha, algo): value}}}
+    data = defaultdict(lambda: defaultdict(dict))
+
+    tb_tags = [f'economics/{m}' if m in ['revenue', 'profit', 'margin'] else f'coi/{m}' if m in ['erosion', 'leakage'] else f'alpha/{m}' for m in metrics]
+    tag_map = dict(zip(tb_tags, metrics))
+
+    for run in runs:
+        # try json first (final eval metrics)
+        jm = load_json_results(run.path)
+        tb = load_tb_scalars(run.path, tb_tags, reduce)
+
+        for tag, metric in tag_map.items():
+            val = None
+            json_key = f'{metric}_mean' if metric != 'reward' else 'reward_mean'
+            if json_key in jm:
+                val = jm[json_key]
+            elif tag in tb:
+                val = tb[tag]
+            if val is not None:
+                data[run.reward_mode][metric][(run.alpha, run.algo)] = val
+
+    # convert to DataFrames
+    tables = {}
+    for mode, metrics_data in data.items():
+        tables[mode] = {}
+        for metric, vals in metrics_data.items():
+            if not vals:
+                continue
+            alphas = sorted(set(a for a, _ in vals.keys()))
+            algos = sorted(set(al for _, al in vals.keys()))
+            df = pd.DataFrame(index=alphas, columns=algos, dtype=float)
+            for (a, al), v in vals.items():
+                df.loc[a, al] = v
+            df.index.name = 'alpha'
+            tables[mode][metric] = df
+    return tables
+
+
+def format_table(df: pd.DataFrame, fmt: str = '.3f') -> str:
+    """Format DataFrame as markdown table."""
+    return df.to_markdown(floatfmt=fmt)
+
+
+def summarize(base_dir: str = 'sim/case/thesis_simplified/runs',
+              metrics: list[str] | None = None,
+              reduce: str = 'last',
+              output: str | None = None) -> dict:
+    """Generate summary tables from experiment runs."""
+    base = Path(base_dir)
+    metrics = metrics or ['revenue', 'profit', 'margin', 'erosion', 'leakage']
+
+    runs = discover_runs(base)
+    if not runs:
+        print(f"No runs found in {base}")
+        return {}
+
+    print(f"Found {len(runs)} runs")
+    tables = build_tables(runs, metrics, reduce)
+
+    lines = []
+    for mode, metric_tables in sorted(tables.items()):
+        lines.append(f"\n# Reward Mode: {mode}\n")
+        for metric, df in sorted(metric_tables.items()):
+            lines.append(f"\n## {metric}\n")
+            lines.append(format_table(df))
+            lines.append("")
+
+    report = '\n'.join(lines)
+    print(report)
+
+    if output:
+        Path(output).write_text(report)
+        print(f"\nSaved to {output}")
+
+    return tables
+
+
+if __name__ == '__main__':
+    import argparse
+    p = argparse.ArgumentParser()
+    p.add_argument('--dir', default='sim/case/thesis_simplified/runs')
+    p.add_argument('--metrics', nargs='+', default=['revenue', 'profit', 'margin', 'erosion', 'leakage'])
+    p.add_argument('--reduce', default='last', choices=['last', 'mean', 'max', 'min'])
+    p.add_argument('--output', '-o', help='save markdown to file')
+    args = p.parse_args()
+    summarize(args.dir, args.metrics, args.reduce, args.output)
--- a/sim/case/thesis_simplified/train.py
+++ b/sim/case/thesis_simplified/train.py
@@ -0,0 +1,336 @@
+"""RL training for thesis pricing system with thesis-aligned metrics.
+
+Trains pricing policies using stable-baselines3 with TensorBoard logging.
+Tracks COI erosion, alpha estimation error, and economic KPIs per thesis formulation.
+"""
+from __future__ import annotations
+import argparse
+import json
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from dataclasses import dataclass, asdict, field
+from pathlib import Path
+from typing import Dict, List, Callable, Any
+import numpy as np
+
+try:
+    from stable_baselines3 import PPO, SAC, A2C
+    from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
+    from stable_baselines3.common.vec_env import DummyVecEnv
+    from stable_baselines3.common.monitor import Monitor
+    HAS_SB3 = True
+except ImportError:
+    HAS_SB3 = False
+
+try:
+    from torch.utils.tensorboard import SummaryWriter
+    HAS_TB = True
+except ImportError:
+    HAS_TB = False
+
+from .simplified_env import PricingEnv, EnvConfig, make_env, adaptive_policy, fixed_price_policy, random_policy
+
+
+@dataclass
+class EpisodeMetrics:
+    reward: float = 0.0
+    revenue: float = 0.0
+    profit: float = 0.0
+    coi_erosion: float = 0.0
+    coi_leakage: float = 0.0
+    alpha_error: float = 0.0
+    avg_margin: float = 0.0
+    n_agents: int = 0
+    steps: int = 0
+
+    def accumulate(self, info: Dict[str, Any]) -> None:
+        self.steps += 1
+        self.reward += info.get('reward', 0)
+        self.revenue += info.get('revenue', 0)
+        self.profit += info.get('profit', 0)
+        self.coi_erosion += info.get('coi_erosion', 0)
+        self.coi_leakage += info.get('coi_leakage', 0)
+        self.alpha_error += abs(info.get('alpha_true', 0) - info.get('alpha_est', 0))
+        self.avg_margin += info.get('avg_margin', 0)
+        self.n_agents += info.get('n_agents', 0)
+
+    def normalized(self) -> Dict[str, float]:
+        s = max(self.steps, 1)
+        return {k: getattr(self, k) / s for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin', 'n_agents']}
+
+
+@dataclass
+class ExperimentConfig:
+    algo: str = "ppo"
+    total_timesteps: int = 100_000
+    n_envs: int = 4
+    eval_freq: int = 5000
+    n_eval_episodes: int = 10
+    log_dir: str = "sim/case/thesis_simplified/runs"
+    seed: int = 42
+    n_products: int = 10
+    max_steps: int = 200
+    alpha_true: float = 0.2
+    reward_mode: str = "robust"
+    experiment_name: str | None = None
+
+    def __post_init__(self):
+        if self.experiment_name is None:
+            self.experiment_name = f"{self.algo}_a{self.alpha_true:.2f}_{self.reward_mode}"
+
+
+class Policy:
+    """Unified policy interface for baselines and trained models."""
+
+    def __init__(self, policy_fn: Callable[[np.ndarray, int], np.ndarray], name: str):
+        self._fn, self.name = policy_fn, name
+
+    def predict(self, obs: np.ndarray, deterministic: bool = True) -> tuple[np.ndarray, None]:
+        return self._fn(obs, (len(obs) - 3) // 3), None
+
+    @staticmethod
+    def fixed(margin: float = 0.15) -> "Policy":
+        return Policy(lambda obs, n: fixed_price_policy(np.ones(n), margin), f"fixed_{margin:.2f}")
+
+    @staticmethod
+    def adaptive(base_margin: float = 0.15) -> "Policy":
+        return Policy(lambda obs, n: adaptive_policy(obs, n, base_margin), f"adaptive_{base_margin:.2f}")
+
+    @staticmethod
+    def random() -> "Policy":
+        return Policy(lambda obs, n: random_policy(n), "random")
+
+    @staticmethod
+    def myopic(greed: float = 0.3) -> "Policy":
+        def _fn(obs: np.ndarray, n: int) -> np.ndarray:
+            demand_norm = obs[n:2*n] if len(obs) > 2*n else np.ones(n) * 0.5
+            return np.ones(n, dtype=np.float32) * np.clip(1.0 + greed * (1 + np.mean(demand_norm)), 0.5, 1.5)
+        return Policy(_fn, f"myopic_{greed:.1f}")
+
+
+def log_metrics(writer: SummaryWriter | None, metrics: Dict[str, float], prefix: str, step: int) -> None:
+    if writer is None:
+        return
+    for k, v in metrics.items():
+        writer.add_scalar(f'{prefix}/{k}', v, step)
+
+
+class MetricsCallback(BaseCallback):
+    def __init__(self, writer: SummaryWriter | None, verbose: int = 0):
+        super().__init__(verbose)
+        self._writer = writer
+
+    def _on_step(self) -> bool:
+        if self._writer is None:
+            return True
+        for info in self.locals.get('infos', []):
+            t = self.num_timesteps
+            self._writer.add_scalar('economics/revenue', info.get('revenue', 0), t)
+            self._writer.add_scalar('economics/profit', info.get('profit', 0), t)
+            self._writer.add_scalar('economics/margin', info.get('avg_margin', 0), t)
+            self._writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), t)
+            self._writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), t)
+            self._writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), t)
+            self._writer.add_scalar('agents/count', info.get('n_agents', 0), t)
+        return True
+
+
+def make_vec_env(cfg: ExperimentConfig, n_envs: int = 1) -> DummyVecEnv:
+    def _make():
+        return Monitor(make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
+                                          alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed)))
+    return DummyVecEnv([_make for _ in range(n_envs)])
+
+
+def run_episodes(policy: Policy | Any, env: PricingEnv, n_episodes: int) -> List[EpisodeMetrics]:
+    """Run policy for n episodes and collect metrics."""
+    metrics = []
+    for _ in range(n_episodes):
+        obs, _ = env.reset()
+        ep, done = EpisodeMetrics(), False
+        while not done:
+            action, _ = policy.predict(obs, deterministic=True)
+            obs, reward, term, trunc, info = env.step(action)
+            done = term or trunc
+            ep.accumulate(info)
+            ep.reward += reward
+        metrics.append(ep)
+    return metrics
+
+
+def evaluate_policy(policy: Policy | Any, cfg: ExperimentConfig, n_episodes: int = 20) -> Dict[str, float]:
+    env = make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
+                             alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed + 999))
+    metrics = run_episodes(policy, env, n_episodes)
+    return {
+        'reward_mean': np.mean([m.reward for m in metrics]), 'reward_std': np.std([m.reward for m in metrics]),
+        **{f'{k}_mean': np.mean([m.normalized()[k] for m in metrics])
+           for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin']},
+    }
+
+
+def run_baseline(policy: Policy, vec_env: DummyVecEnv, total_steps: int, writer: SummaryWriter | None):
+    obs, n_envs = vec_env.reset(), vec_env.num_envs
+    ep_rewards = np.zeros(n_envs)
+
+    for step in range(0, total_steps, n_envs):
+        actions = np.array([policy.predict(obs[i])[0] for i in range(n_envs)])
+        obs, rewards, dones, infos = vec_env.step(actions)
+        ep_rewards += rewards
+        for i, info in enumerate(infos):
+            if writer:
+                writer.add_scalar('economics/revenue', info.get('revenue', 0), step)
+                writer.add_scalar('economics/profit', info.get('profit', 0), step)
+                writer.add_scalar('economics/margin', info.get('avg_margin', 0), step)
+                writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), step)
+                writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), step)
+                writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), step)
+                writer.add_scalar('agents/count', info.get('n_agents', 0), step)
+            if dones[i]:
+                if writer:
+                    writer.add_scalar('rollout/ep_reward', ep_rewards[i], step)
+                ep_rewards[i] = 0
+
+
+def train(cfg: ExperimentConfig) -> Dict[str, Any]:
+    is_baseline = cfg.algo.lower() in ["fixed", "adaptive", "random", "myopic"]
+    if not HAS_SB3 and not is_baseline:
+        raise ImportError("stable-baselines3 required: pip install stable-baselines3[extra]")
+
+    log_path = Path(cfg.log_dir) / cfg.experiment_name
+    log_path.mkdir(parents=True, exist_ok=True)
+    with open(log_path / "config.json", "w") as f:
+        json.dump(asdict(cfg), f, indent=2)
+
+    writer = SummaryWriter(log_path) if HAS_TB else None
+    train_env, eval_env = make_vec_env(cfg, cfg.n_envs), make_vec_env(cfg, 1)
+
+    if is_baseline:
+        policy = {"fixed": Policy.fixed, "adaptive": Policy.adaptive, "random": Policy.random, "myopic": Policy.myopic}[cfg.algo.lower()]()
+        run_baseline(policy, train_env, cfg.total_timesteps, writer)
+        final_metrics = evaluate_policy(policy, cfg)
+    else:
+        algo_cls = {"ppo": PPO, "sac": SAC, "a2c": A2C}[cfg.algo.lower()]
+        common = dict(verbose=1, seed=cfg.seed, tensorboard_log=str(log_path), device="auto")
+        model = {
+            "ppo": lambda: PPO("MlpPolicy", train_env, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01, **common),
+            "sac": lambda: SAC("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50_000, batch_size=512, tau=0.02, gamma=0.99, learning_starts=1000, ent_coef="auto_0.1", train_freq=4, **common),
+            "a2c": lambda: A2C("MlpPolicy", train_env, learning_rate=7e-4, n_steps=5, gamma=0.99, **common),
+        }[cfg.algo.lower()]()
+
+        cb = MetricsCallback(writer)
+        eval_cb = EvalCallback(eval_env, best_model_save_path=str(log_path / "best"), log_path=str(log_path),
+                               eval_freq=cfg.eval_freq, n_eval_episodes=cfg.n_eval_episodes, deterministic=True)
+        model.learn(cfg.total_timesteps, callback=[cb, eval_cb], progress_bar=True)
+        model.save(log_path / "final_model")
+        policy = model
+        final_metrics = evaluate_policy(model, cfg)
+
+    if writer:
+        log_metrics(writer, final_metrics, 'final', cfg.total_timesteps)
+        writer.close()
+
+    train_env.close(); eval_env.close()
+    with open(log_path / "results.json", "w") as f:
+        json.dump(final_metrics, f, indent=2)
+    return {"path": str(log_path), "metrics": final_metrics}
+
+
+def _train_alpha(args: tuple) -> tuple[str, Dict]:
+    """Worker for parallel sweep - must be top-level for pickling."""
+    cfg_dict, alpha = args
+    cfg_dict["alpha_true"] = alpha
+    cfg_dict["experiment_name"] = f"{cfg_dict['algo']}_a{alpha:.2f}_{cfg_dict['reward_mode']}"
+    sweep_cfg = ExperimentConfig(**cfg_dict)
+    print(f"[alpha={alpha:.2f}] starting")
+    metrics = train(sweep_cfg)["metrics"]
+    print(f"[alpha={alpha:.2f}] done")
+    return f"alpha_{alpha:.2f}", metrics
+
+
+def run_sweep(cfg: ExperimentConfig, alphas: List[float] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
+    alphas = alphas or [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+    cfg_dict = asdict(cfg)
+
+    if max_workers == 1:  # sequential fallback
+        results = dict(_train_alpha((cfg_dict.copy(), a)) for a in alphas)
+    else:
+        with ProcessPoolExecutor(max_workers=max_workers) as pool:
+            futures = {pool.submit(_train_alpha, (cfg_dict.copy(), a)): a for a in alphas}
+            results = {}
+            for fut in as_completed(futures):
+                key, metrics = fut.result()
+                results[key] = metrics
+
+    summary_path = Path(cfg.log_dir) / f"sweep_{cfg.algo}_{cfg.reward_mode}.json"
+    with open(summary_path, "w") as f:
+        json.dump(results, f, indent=2)
+    print(f"\nSweep results saved to {summary_path}")
+    return results
+
+
+def _train_policy(args: tuple) -> tuple[str, Dict]:
+    """Worker for parallel policy comparison."""
+    cfg_dict, algo = args
+    cfg_dict["algo"] = algo
+    cfg_dict["experiment_name"] = f"cmp_{algo}_a{cfg_dict['alpha_true']:.2f}"
+    cmp_cfg = ExperimentConfig(**cfg_dict)
+    print(f"[{algo}] starting")
+    metrics = train(cmp_cfg)["metrics"]
+    print(f"[{algo}] done")
+    return algo, metrics
+
+
+def compare_policies(cfg: ExperimentConfig, policies: List[str] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
+    policies = policies or ["fixed", "adaptive", "myopic", "random"]
+    cfg_dict = asdict(cfg)
+
+    if max_workers == 1:
+        results = dict(_train_policy((cfg_dict.copy(), p)) for p in policies)
+    else:
+        with ProcessPoolExecutor(max_workers=max_workers) as pool:
+            futures = {pool.submit(_train_policy, (cfg_dict.copy(), p)): p for p in policies}
+            results = {}
+            for fut in as_completed(futures):
+                algo, metrics = fut.result()
+                results[algo] = metrics
+
+    cmp_path = Path(cfg.log_dir) / f"compare_a{cfg.alpha_true:.2f}.json"
+    with open(cmp_path, "w") as f:
+        json.dump(results, f, indent=2)
+    print(f"\nComparison saved to {cmp_path}")
+    for algo, m in results.items():
+        print(f"  {algo:12s}: reward={m['reward_mean']:.2f} coi_erosion={m['coi_erosion_mean']:.4f} alpha_err={m['alpha_error_mean']:.4f}")
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Train RL pricing policies")
+    parser.add_argument("--algo", default="ppo", choices=["ppo", "sac", "a2c", "fixed", "adaptive", "random", "myopic"])
+    parser.add_argument("--steps", type=int, default=100_000)
+    parser.add_argument("--alpha", type=float, default=0.2)
+    parser.add_argument("--reward-mode", default="robust", choices=["revenue", "profit", "robust", "coi_aware"])
+    parser.add_argument("--n-products", type=int, default=10)
+    parser.add_argument("--n-envs", type=int, default=4)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--log-dir", default="sim/case/thesis_simplified/runs")
+    parser.add_argument("--sweep", action="store_true", help="run contamination sweep")
+    parser.add_argument("--compare", action="store_true", help="compare all baselines")
+    parser.add_argument("--workers", type=int, default=None, help="max parallel workers for sweep (None=auto, 1=sequential)")
+    args = parser.parse_args()
+
+    cfg = ExperimentConfig(algo=args.algo, total_timesteps=args.steps, alpha_true=args.alpha,
+                           reward_mode=args.reward_mode, n_products=args.n_products,
+                           n_envs=args.n_envs, seed=args.seed, log_dir=args.log_dir)
+
+    if args.sweep:
+        run_sweep(cfg, max_workers=args.workers)
+    elif args.compare:
+        compare_policies(cfg, max_workers=args.workers)
+    else:
+        result = train(cfg)
+        print(f"\nTraining complete: {result['path']}")
+        print(f"Metrics: {json.dumps(result['metrics'], indent=2)}")
+
+
+if __name__ == "__main__":
+    main()
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -226,6 +226,7 @@ if __name__ == "__main__":

    agent_model = AgentBehaviorModel(agent_dir)
    agent_mdp = agent_model.build_MDP()
+    print(agent_mdp)
    print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
          f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
    if not agent_mdp['states']:
@@ -234,6 +235,9 @@ if __name__ == "__main__":

    human_evt = aggregate_event_transitions(human_mdp)
    agent_evt = aggregate_event_transitions(agent_mdp)
+    print(agent_evt)
+
+
    common = set(human_evt.keys()) & set(agent_evt.keys())

    if not common:
--- a/sim/rl/engine.py
+++ b/sim/rl/engine.py
@@ -76,8 +76,7 @@ class WildPricingEngine(BasePricingEngine):

    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
-        # extract demand signal (from env observation) as proxy for sales
-        demand = observation.get('demand', np.zeros(self.c.product_catalogue_size, dtype=np.float32))
+        demand = _extract_demand(observation, self.c.product_catalogue_size)
        return self._update_from_demand(current_prices, demand)

    def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
@@ -141,7 +140,7 @@ class SimpleDemandEngine(BasePricingEngine):

    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
-        demand = observation.get('demand', np.zeros(self.c.product_catalogue_size, dtype=np.float32))
+        demand = _extract_demand(observation, self.c.product_catalogue_size)
        if self.prev_demand is None:
            self.prev_demand = demand.copy()
            return current_prices.copy()
@@ -207,7 +206,7 @@ class ThompsonSamplingEngine(BasePricingEngine):
            lo = current_prices * 0.7
            hi = current_prices * 1.3
            self.price_grid = np.linspace(lo, hi, self.n_price_levels).T
-        demand = observation.get('demand', np.zeros(self.c.product_catalogue_size, dtype=np.float32))
+        demand = _extract_demand(observation, self.c.product_catalogue_size)
        # update beliefs based on last action
        if self.last_actions is not None:
            for i in range(self.c.product_catalogue_size):
@@ -226,3 +225,14 @@ class ThompsonSamplingEngine(BasePricingEngine):
            new_prices[i] = self.price_grid[i, actions[i]]
        self.last_actions = actions
        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
+
+
+def _extract_demand(observation: Dict[str, Any], n: int) -> np.ndarray:
+    if "elasticity" in observation and isinstance(observation["elasticity"], dict):
+        d = observation["elasticity"].get("demand")
+        if d is not None:
+            return np.asarray(d, dtype=np.float32)
+    d = observation.get("demand")
+    if d is not None:
+        return np.asarray(d, dtype=np.float32)
+    return np.zeros(n, dtype=np.float32)
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -1,682 +1,244 @@
-import gymnasium as gym
-from gymnasium import spaces
-import numpy as np
-from dataclasses import dataclass
-import pandas as pd
-from types import SimpleNamespace
-from typing import Optional, Dict, Any, List, Tuple
+from __future__ import annotations

-from lib.separability import load_artifacts, score_session, estimate_alpha
-from sim.rl.behavior_loader.models import AgentBehaviorModel, BehaviorModel, aggregate_event_transitions
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, Tuple
+
+import numpy as np

 try:
-    import jax
-    from sim.rl.jax_core import JAX_AVAILABLE, compile_transitions, fallback_transitions, sample_sessions, compute_metrics
-    from sim.rl.jax_core import session_features, compute_session_transitions, compute_divergences, estimate_alpha_batch
-except ImportError:
-    JAX_AVAILABLE = False
+    import gymnasium as gym
+    from gymnasium import spaces
+except ImportError as e:
+    raise ImportError("sim.rl.environment requires gymnasium") from e

-# "learner" agent learning to optimize pricing
-# "agent" part of environment creating demand signals that learner processes
+from sim.case.thesis_simplified.coi import COIWindow, coi_erosion, compute_coi_window
+from sim.case.thesis_simplified.separability import estimate_alpha as estimate_session_alpha
+from sim.case.thesis_simplified.simplified import Limbo, Session, put_prices_to_market
+from sim.rl.thesis_core import aggregate_demand_by_product, aggregate_purchases, constrain_prices
+
+
+@dataclass(frozen=True)
+class BusinessLogicConstraints:
+    product_catalogue_size: int = 100
+    max_steps: int = 2000
+    sessions_per_step: int = 250

-base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
-human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
-@dataclass
-class BusinessLogicConstraints():
-    max_price_adjustment: float = 0.30
    system_max_price: float = 500.0
    system_min_price: float = 1.0
-    product_catalogue_size: int = 100
-    episode_length: int = 2000
-    sessions_per_step: int = 250
+    max_price_adjustment: float = 0.30
+    min_margin_pct: float = 0.05
+
    agent_share: float = 0.2
-    agent_recon_multiplier: float = 6.0
-    agent_purchase_probability: float = 0.20
+    alpha_drift: float = 0.0
+    alpha_bounds: tuple[float, float] = (0.0, 0.8)
+
    coi_strength: float = 0.25
-    coi_threshold: float = 4.0
-    coi_sigmoid_temp: float = 1.25
-    base_human_demand: float = 0.08
-    base_agent_demand: float = 0.05
-    human_price_elasticity: float = -1.2 # assumptions here
-    agent_price_elasticity: float = -0.6
-    w_agent_loss: float = 1.0
    w_volatility: float = 5.0
    w_estimation_error: float = 0.25
+
    seed: int = 7


-def _sigmoid(x: np.ndarray) -> np.ndarray:
-    return 1.0 / (1.0 + np.exp(-x))
-
-EVENT_PAGE_MAP = {
-    "session_start": "/",
-    "page_view": "/",
-    "view_item_page": "/products",
-    "learn_more_about_item": "/products/details",
-    "add_item_to_cart": "/cart",
-    "checkout_start": "/checkout",
-    "purchase_complete": "/checkout",
-    "session_end": "/checkout/success",
-}
-
-# map real collected event names to canonical simulation states
-EVENT_CANONICAL_MAP = {
-    "page_view": "session_start",
-    "hover_over_paragraph": "view_item_page",
-    "hover_over_title": "view_item_page",
-    "view_item_page": "view_item_page",
-    "learn_more_about_item": "learn_more_about_item",
-    "add_item_to_cart": "add_item_to_cart",
-    "checkout_start": "purchase_complete",
-    "remove_item": "view_item_page",
-}
-
-
-def _canonicalize_transitions(raw_trans: Dict[str, Dict[str, float]]) -> Dict[str, Dict[str, float]]:
-    """Map real event transition names to canonical simulation states."""
-    canonical: Dict[str, Dict[str, float]] = {}
-    for src, dsts in raw_trans.items():
-        src_canon = EVENT_CANONICAL_MAP.get(src, src)
-        if src_canon not in canonical:
-            canonical[src_canon] = {}
-        for dst, prob in dsts.items():
-            dst_canon = EVENT_CANONICAL_MAP.get(dst, dst)
-            canonical[src_canon][dst_canon] = canonical[src_canon].get(dst_canon, 0.0) + prob
-    # re-normalize after aggregation
-    for src in canonical:
-        total = sum(canonical[src].values())
-        if total > 0:
-            canonical[src] = {k: v / total for k, v in canonical[src].items()}
-    return canonical
-
-
-class BehavioralProfile:
-    """Synthetic Markov profile used to generate interaction sessions.
-    Uses aggregate_event_transitions from models.py to build transition kernels from real data."""
-
-    def __init__(self, actor: str, purchase_probs: np.ndarray):
-        self.actor = actor
-        self.purchase_probs = np.clip(purchase_probs, 0.0, 0.95)
-        self.states = [
-            "session_start",
-            "view_item_page",
-            "learn_more_about_item",
-            "add_item_to_cart",
-            "purchase_complete",
-            "session_end",
-        ]
-        model = AgentBehaviorModel(agent_dir) if actor == "agents" else BehaviorModel(human_dir)
-        mdp = model.build_MDP()
-        raw_trans = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
-        self.transitions = _canonicalize_transitions(raw_trans) if raw_trans else self._fallback_transitions()
-        self._ensure_terminal_states()
-        self.dwell_params = self._extract_dwell_params(mdp)
-
-    def _ensure_terminal_states(self):
-        # guarantee purchase_complete leads to session_end and session_start exists
-        if "purchase_complete" not in self.transitions:
-            self.transitions["purchase_complete"] = {"session_end": 1.0}
-        elif "session_end" not in self.transitions.get("purchase_complete", {}):
-            self.transitions["purchase_complete"]["session_end"] = 1.0
-            total = sum(self.transitions["purchase_complete"].values())
-            self.transitions["purchase_complete"] = {k: v/total for k, v in self.transitions["purchase_complete"].items()}
-        if "session_start" not in self.transitions:
-            self.transitions["session_start"] = {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1}
-
-    def _fallback_transitions(self) -> Dict[str, Dict[str, float]]:
-        return {
-            "session_start": {"view_item_page": 0.85, "session_end": 0.15},
-            "view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
-            "learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
-            "add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
-            "purchase_complete": {"session_end": 1.0},
-        }
-
-    def _extract_dwell_params(self, mdp: Dict) -> Dict[str, Tuple[float, float]]:
-        state_vals = mdp.get("state_values", {})
-        params = {}
-        for state in self.states:
-            # try canonical and raw state names
-            val = state_vals.get(state, 0.5)
-            for raw, canon in EVENT_CANONICAL_MAP.items():
-                if canon == state and raw in state_vals:
-                    val = state_vals[raw]
-                    break
-            shape = 1.5 + val * 2.0
-            scale = 0.8 + (1.0 - val) * 1.2
-            params[state] = (shape, scale)
-        return params
-
-    def _transition_probs(self, state: str, product_idx: int) -> Dict[str, float]:
-        probs = dict(self.transitions.get(state, {"session_end": 1.0}))
-        if state == "add_item_to_cart":
-            base = probs.get("purchase_complete", 0.0)
-            demand_factor = float(self.purchase_probs[int(product_idx)])
-            if self.actor == "agents":
-                demand_factor *= 0.7
-            adjusted = np.clip(base * 0.5 + demand_factor * 0.5, 0.0, 0.95)
-            remainder = max(1e-6, 1.0 - adjusted)
-            other_total = sum(v for k, v in probs.items() if k != "purchase_complete")
-            scale = remainder / max(other_total, 1e-6)
-            for key in probs:
-                if key == "purchase_complete":
-                    probs[key] = adjusted
-                else:
-                    probs[key] = probs[key] * scale
-        total = sum(probs.values())
-        if total <= 0:
-            return {"session_end": 1.0}
-        return {state: val / total for state, val in probs.items()}
-
-    def sample_session(
-        self,
-        rng: np.random.Generator,
-        session_id: str,
-        prices: np.ndarray,
-        unit_cost: np.ndarray,
-    ) -> Tuple[List[Dict[str, Any]], List[SimpleNamespace]]:
-        """Generate a single session trajectory respecting business constraints."""
-        events: List[Dict[str, Any]] = []
-        feature_events: List[SimpleNamespace] = []
-        state = "session_start"
-        t = 0.0
-        product_idx = int(rng.integers(0, len(prices)))
-        product_id = f"product-{product_idx:04d}"
-
-
-        # enforce price >= cost constraint (lipschitz bound on pricing)
-        # This is a sort of last resort to not let an pricing learner go rogue
-        cost = float(unit_cost[product_idx])
-        constrained_price = max(float(prices[product_idx]), cost * 1.05)  # 5% min margin
-
-        while state != "session_end" and len(events) < 40:
-            if state != "session_start":
-                row = {
-                    "session_id": session_id,
-                    "actor": "agent" if self.actor == "agents" else "human",
-                    "eventName": state,
-                    "product_idx": product_idx,
-                    "productId": product_id,
-                    "price_offered": constrained_price,
-                    "price_paid": 0.0,
-                    "page": EVENT_PAGE_MAP.get(state, "/"),
-                    "ts": t,
-                    "unit_cost": cost,
-                    "base_price": float(prices[product_idx]),
-                }
-                if state == "purchase_complete":
-                    noise = float(rng.normal(0.0, 0.015))
-                    row["price_paid"] = max(constrained_price * (1.0 + noise), cost)
-                events.append(row)
-                feature_events.append(
-                    SimpleNamespace(
-                        eventName=row["eventName"],
-                        page=row["page"],
-                        productId=row["productId"],
-                        ts=row["ts"],
-                    )
-                )
-
-            transitions = self._transition_probs(state, product_idx)
-            next_state = rng.choice(list(transitions.keys()), p=list(transitions.values()))
-            shape, scale = self.dwell_params.get(state, (2.0, 1.0))
-            dwell = max(0.3, rng.gamma(shape=shape, scale=scale))
-            t += dwell
-            state = next_state
-
-        return events, feature_events
-
-
-def _load_behavioral_profile(actor: str, demand_forcing: np.ndarray) -> BehavioralProfile:
-    """returns a behavioral profile for generating synthetic sessions
-    actor: 'humans' or 'agents'
-    demand_forcing: per-product purchase probabilities used to weight interactions
-    """
-    return BehavioralProfile(actor, demand_forcing)
-
-
-class CommercePlatform:
-    """state management for the environment, simulates demand"""
-    def __init__(self, product_catalogue_size: int, max_price: float, min_price: float, constraints: BusinessLogicConstraints):
-        self.product_catalogue_size = product_catalogue_size
-        self.max_price = max_price
-        self.min_price = min_price
-        self.constraints = constraints
-        self.simulation_history: List[Dict[str, Any]] = []
-        self._rng = np.random.default_rng(constraints.seed)
-        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
-        self.unit_cost = np.random.uniform(low=15.0, high=60.0, size=(self.product_catalogue_size,)).astype(np.float32)
-        self.base_price = np.random.uniform(low=60.0, high=140.0, size=(self.product_catalogue_size,)).astype(np.float32)
-        self.alpha_hat = constraints.agent_share
-        try:
-            self.separability_artifacts = load_artifacts()
-        except FileNotFoundError:
-            self.separability_artifacts = None
-
-    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
-        p = np.clip(prices, self.min_price, self.max_price)
-        cost = np.clip(self.unit_cost, self.min_price * 0.2, self.max_price)
-        margin = np.clip((p - cost) / np.maximum(cost, 1e-3), -0.9, 2.0)
-        # isoelastic demand approximation
-        human_prob = self.constraints.base_human_demand * np.exp(self.constraints.human_price_elasticity * margin)
-        agent_prob = self.constraints.base_agent_demand * np.exp(self.constraints.agent_price_elasticity * margin)
-        return {
-            "human_purchase_prob": np.clip(human_prob, 0.0, 0.95),
-            "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95),
-        }
-
-    def _simulate_sessions(self, prices: np.ndarray) -> Tuple[pd.DataFrame, Dict[str, Any]]:
-        demand = self.setup_true_demand(prices)
-        T = self.constraints.sessions_per_step
-        effective_share = float(np.clip(self.alpha_hat, 0.0, 0.95))
-        n_agent_sessions = max(1, int(round(T * effective_share)))
-        n_human_sessions = max(1, T - n_agent_sessions)
-
-        session_map = {
-            "humans": n_human_sessions,
-            "agents": n_agent_sessions,
-        }
-        pprob_map = {
-            "humans": demand["human_purchase_prob"],
-            "agents": demand["agent_purchase_prob"],
-        }
-
-        rows: List[Dict[str, Any]] = []
-        session_scores: List[Dict[str, float]] = []
-        demand_human = np.zeros_like(prices, dtype=np.float32)
-        demand_agent = np.zeros_like(prices, dtype=np.float32)
-
-        for actor, n_sessions in session_map.items():
-            profile = _load_behavioral_profile(actor, pprob_map[actor])
-            for idx in range(n_sessions):
-                session_id = f"{actor}_{idx:06d}"
-                session_rows, feature_events = profile.sample_session(
-                    self._rng, session_id, prices, self.unit_cost
-                )
-                rows.extend(session_rows)
-                if session_rows:
-                    df_session = pd.DataFrame(session_rows)
-                    purchases = df_session[df_session["eventName"] == "purchase_complete"]
-                    if not purchases.empty:
-                        counts = purchases.groupby("product_idx").size()
-                        if actor == "agents":
-                            demand_agent[counts.index.to_numpy(dtype=int)] += counts.to_numpy(dtype=np.float32)
-                        else:
-                            demand_human[counts.index.to_numpy(dtype=int)] += counts.to_numpy(dtype=np.float32)
-                if self.separability_artifacts and feature_events:
-                    score = score_session(feature_events, self.separability_artifacts)
-                    session_scores.append(score)
-
-        interactions_df = pd.DataFrame(rows)
-        diagnostics = {
-            "alpha_hat": float(self.alpha_hat),
-            "session_scores": session_scores,
-            "demand_human": demand_human,
-            "demand_agent": demand_agent,
-        }
-
-        if session_scores:
-            alphas = [
-                estimate_alpha(s["prob_agent"], s["delta_h"], s["delta_a"], temperature=2.0)
-                for s in session_scores
-            ]
-            mean_alpha = float(np.mean(alphas))
-            # exponential moving average for stability
-            self.alpha_hat = 0.7 * self.alpha_hat + 0.3 * mean_alpha
-            diagnostics.update(
-                {
-                    "alpha_hat": float(self.alpha_hat),
-                    "delta_h_mean": float(np.mean([s["delta_h"] for s in session_scores])),
-                    "delta_a_mean": float(np.mean([s["delta_a"] for s in session_scores])),
-                    "prob_agent_mean": float(np.mean([s["prob_agent"] for s in session_scores])),
-                }
-            )
-
-        self._last_interaction_df = interactions_df
-        return interactions_df, diagnostics
-
-    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
-        if interaction_df.empty:
-            return {
-                "revenue_observed": 0.0,
-                "revenue_oracle": 0.0,
-                "agent_loss": 0.0,
-                "true_human_purchases": 0.0,
-                "true_agent_purchases": 0.0,
-                "mean_sale_price": 0.0,
-                "look_to_book": 0.0,
-                "coi": 0.0,
-                "expected_premium": 0.0,
-            }
-
-        purchases = interaction_df[interaction_df["eventName"] == "purchase_complete"]
-        human_purchases = purchases[purchases["actor"] == "human"]
-        agent_purchases = purchases[purchases["actor"] == "agent"]
-
-        revenue_observed = float(purchases["price_paid"].sum())
-        revenue_oracle = float(purchases["base_price"].sum())
-        agent_loss = float((agent_purchases["base_price"] - agent_purchases["price_paid"]).sum())
-
-        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
-        views = float((interaction_df["eventName"] == "view_item_page").sum())
-        look_to_book = float(views / (len(purchases) + 1e-6))
-        true_human = float(len(human_purchases))
-        true_agent = float(len(agent_purchases))
-
-        human_prices = human_purchases["price_offered"] if not human_purchases.empty else pd.Series(dtype=float)
-        human_costs = human_purchases["unit_cost"] if not human_purchases.empty else pd.Series(dtype=float)
-        human_base = human_purchases["base_price"] if not human_purchases.empty else pd.Series(dtype=float)
-        coi = 0.0
-        if not human_prices.empty and not human_costs.empty:
-            # COI = E[P] - p_min where p_min is cost, accounting for expected premium (base - realized)
-            margin = human_prices.mean() - human_costs.mean()
-            expected_premium = human_base.mean() - human_prices.mean() if not human_base.empty else 0.0
-            coi = float(np.maximum(0.0, margin - expected_premium * 0.5))
-
-        return {
-            "revenue_observed": revenue_observed,
-            "revenue_oracle": revenue_oracle,
-            "agent_loss": agent_loss,
-            "true_human_purchases": true_human,
-            "true_agent_purchases": true_agent,
-            "mean_sale_price": mean_sale_price,
-            "look_to_book": look_to_book,
-            "coi": coi,
-            "expected_premium": float(expected_premium) if not human_base.empty else 0.0,
-        }
-
-    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Extract per-session behavioral features for separability analysis."""
-        if df.empty:
-            return pd.DataFrame()
-        g = df.groupby("session_id", sort=False)
-        session_duration = g["ts"].max() - g["ts"].min()
-        total_interactions = g.size()
-        avg_time_between = g["ts"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
-        interaction_velocity = total_interactions / (session_duration + 1e-6)
-        views = g.apply(lambda x: int((x["eventName"] == "view_item_page").sum()), include_groups=False)
-        cart_adds = g.apply(lambda x: int((x["eventName"] == "add_item_to_cart").sum()), include_groups=False)
-        purchases = g.apply(lambda x: int((x["eventName"] == "purchase_complete").sum()), include_groups=False)
-        learn_more = g.apply(lambda x: int((x["eventName"] == "learn_more_about_item").sum()), include_groups=False)
-        conversion_rate = purchases / (views + 1e-6)
-        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
-        # price sensitivity features
-        price_variance = g["price_offered"].var().fillna(0.0)
-        avg_price_seen = g["price_offered"].mean().fillna(0.0)
-        products_viewed = g["product_idx"].nunique()
-
-        return pd.DataFrame({
-            "session_duration_sec": session_duration.astype(float),
-            "avg_time_between_events": avg_time_between.astype(float),
-            "total_interactions": total_interactions.astype(int),
-            "interaction_velocity": interaction_velocity.astype(float),
-            "item_views": views.astype(int),
-            "cart_adds": cart_adds.astype(int),
-            "purchases": purchases.astype(int),
-            "learn_more_clicks": learn_more.astype(int),
-            "conversion_rate": conversion_rate.astype(float),
-            "price_variance": price_variance.astype(float),
-            "avg_price_seen": avg_price_seen.astype(float),
-            "products_viewed": products_viewed.astype(int),
-            "is_agent": is_agent.astype(bool),
-        }).reset_index()
-
-    def get_interaction_data(self) -> np.ndarray:
-        if self._last_interaction_df.empty:
-            return np.array([], dtype=object)
-        return self._last_interaction_df.to_dict(orient="records")
+def make_env(constraints: Optional[BusinessLogicConstraints] = None) -> "PHANTOMEnv":
+    return PHANTOMEnv(constraints=constraints or BusinessLogicConstraints())


 class PHANTOMEnv(gym.Env):
-    metadata = {"render_modes": []}
+    metadata = {"render_modes": ["human", "ansi"]}

-    def __init__(self, constraints: Optional[BusinessLogicConstraints] = None, use_jax: bool = True):
+    def __init__(self, constraints: Optional[BusinessLogicConstraints] = None):
        super().__init__()
-        self.constraints = constraints if isinstance(constraints, BusinessLogicConstraints) else BusinessLogicConstraints()
-        self.use_jax = use_jax and JAX_AVAILABLE
-        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
-                                       high=self.constraints.max_price_adjustment,
-                                       shape=(self.constraints.product_catalogue_size,), dtype=np.float32)
-        n_products = self.constraints.product_catalogue_size
-        self.observation_space = spaces.Dict({
-            "elasticity": spaces.Dict({
+        self.c = constraints or BusinessLogicConstraints()
+        self.n = int(self.c.product_catalogue_size)
+
+        self._rng = np.random.default_rng(self.c.seed)
+        self._t = 0
+        self._alpha_true = float(self.c.agent_share)
+        self._alpha_hat = float(self.c.agent_share)
+        self._costs = np.zeros(self.n, dtype=np.float32)
+        self._refs = np.zeros(self.n, dtype=np.float32)
+        self._prices: Optional[np.ndarray] = None
+        self._last_sessions: list[Session] = []
+        self._last_coi: COIWindow | None = None
+        self._limbo = Limbo()
+
+        self.action_space = spaces.Box(
+            low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
+            high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
+            dtype=np.float32,
+        )
+        self.observation_space = spaces.Dict(
+            {
+                "elasticity": spaces.Dict(
+                    {
                        "price": spaces.Box(
-                    low=np.full((n_products,), self.constraints.system_min_price, dtype=np.float32),
-                    high=np.full((n_products,), self.constraints.system_max_price, dtype=np.float32),
-                    dtype=np.float32),
+                            low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
+                            high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
+                            dtype=np.float32,
+                        ),
                        "demand": spaces.Box(
-                    low=np.zeros((n_products,), dtype=np.float32),
-                    high=np.full((n_products,), 1e6, dtype=np.float32),
-                    dtype=np.float32),
-            }),
-            "market": spaces.Dict({
+                            low=np.zeros((self.n,), dtype=np.float32),
+                            high=np.full((self.n,), 1e9, dtype=np.float32),
+                            dtype=np.float32,
+                        ),
+                    }
+                ),
+                "market": spaces.Dict(
+                    {
                        "alpha_hat": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
-                "revenue_rate": spaces.Box(low=0.0, high=1e6, shape=(1,), dtype=np.float32),
+                        "revenue_rate": spaces.Box(low=0.0, high=1e12, shape=(1,), dtype=np.float32),
                        "conversion_rate": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
                        "price_volatility": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
-            }),
-            "cost": spaces.Box(low=0.0, high=self.constraints.system_max_price, shape=(n_products,), dtype=np.float32),
-        })
-        self.commerce_platform = CommercePlatform(
-            product_catalogue_size=self.constraints.product_catalogue_size,
-            max_price=self.constraints.system_max_price,
-            min_price=self.constraints.system_min_price,
-            constraints=self.constraints)
-        self._rng = np.random.default_rng(self.constraints.seed)
-        self.t = 0
-        self._prev_prices: Optional[np.ndarray] = None
-        self.state: Dict[str, Any] = {}
-        self._jax_key = None
-        self._jax_trans = None
-        if self.use_jax:
-            self._jax_key = jax.random.PRNGKey(self.constraints.seed)
-            self._init_jax_transitions()
+                    }
+                ),
+                "cost": spaces.Box(
+                    low=np.zeros((self.n,), dtype=np.float32),
+                    high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
+                    dtype=np.float32,
+                ),
+            }
+        )

-    def _init_jax_transitions(self):
-        try:
-            human_profile = _load_behavioral_profile("humans", np.ones(self.constraints.product_catalogue_size) * 0.1)
-            agent_profile = _load_behavioral_profile("agents", np.ones(self.constraints.product_catalogue_size) * 0.1)
-            self._jax_trans = compile_transitions(human_profile, agent_profile).to_jax()
-        except Exception:
-            self._jax_trans = fallback_transitions().to_jax()
+    def _reset_catalogue(self) -> None:
+        self._costs = self._rng.uniform(15.0, 60.0, size=self.n).astype(np.float32)
+        margins = self._rng.uniform(0.2, 0.6, size=self.n).astype(np.float32)
+        self._refs = (self._costs * (1.0 + margins)).astype(np.float32)
+        self._prices = self._refs.copy()
+
+    def _observe_market(
+        self, prices: np.ndarray
+    ) -> tuple[list[Session], Dict[str, float], np.ndarray, np.ndarray, float, float, int]:
+        sessions, demand_map = put_prices_to_market(
+            prices,
+            costs=self._costs,
+            alpha=self._alpha_true,
+            n_sessions=int(self.c.sessions_per_step),
+            seed=int(self._rng.integers(0, 2**31 - 1)),
+        )
+        demand_by_product = aggregate_demand_by_product(sessions, demand_map, self.n)
+        purchases, revenue, cost, n_agents = aggregate_purchases(sessions, self._costs, self.n)
+        conversion = float(np.sum(purchases) / max(len(sessions), 1))
+        return sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents
+
+    def _update_alpha_hat(self, sessions: list[Session]) -> float:
+        scores = [estimate_session_alpha(s) for s in sessions if s.events]
+        if not scores:
+            return self._alpha_hat
+        alpha_step = float(np.mean(scores))
+        self._alpha_hat = 0.8 * self._alpha_hat + 0.2 * alpha_step
+        self._alpha_hat = float(np.clip(self._alpha_hat, 0.0, 1.0))
+        return self._alpha_hat
+
+    def _reward(self, prices: np.ndarray, revenue: float, cost: float, volatility: float) -> float:
+        profit = float(revenue - cost)
+        coi_leak = float(self._last_coi.leak) if self._last_coi else 0.0
+        alpha_err = abs(self._alpha_hat - self._alpha_true)
+        return profit - self.c.coi_strength * coi_leak - self.c.w_volatility * volatility - self.c.w_estimation_error * alpha_err
+
+    def _build_obs(
+        self,
+        prices: np.ndarray,
+        demand_by_product: np.ndarray,
+        revenue: float,
+        conversion: float,
+        volatility: float,
+    ) -> Dict[str, Any]:
+        return {
+            "elasticity": {"price": prices.astype(np.float32), "demand": demand_by_product.astype(np.float32)},
+            "market": {
+                "alpha_hat": np.array([self._alpha_hat], dtype=np.float32),
+                "revenue_rate": np.array([revenue], dtype=np.float32),
+                "conversion_rate": np.array([conversion], dtype=np.float32),
+                "price_volatility": np.array([volatility], dtype=np.float32),
+            },
+            "cost": self._costs.astype(np.float32),
+        }

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        if seed is not None:
            self._rng = np.random.default_rng(seed)
-            self.commerce_platform._rng = np.random.default_rng(seed)
-            if self.use_jax:
-                self._jax_key = jax.random.PRNGKey(seed)
-        self.commerce_platform.alpha_hat = self.constraints.agent_share
-        self.t = 0
-        init_prices = self._rng.uniform(
-            low=60.0,
-            high=140.0,
-            size=(self.constraints.product_catalogue_size,),
-        ).astype(np.float32)
-        self.commerce_platform.unit_cost = self._rng.uniform(
-            low=15.0,
-            high=60.0,
-            size=(self.constraints.product_catalogue_size,),
-        ).astype(np.float32)
-        self.commerce_platform.base_price = init_prices.copy()
-        self._prev_prices = init_prices.copy()
-        self.state = {
-            "elasticity": {
-                "price": init_prices,
-                "demand": np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32),
-            },
-            "market": {
-                "alpha_hat": np.array([self.constraints.agent_share], dtype=np.float32),
-                "revenue_rate": np.array([0.0], dtype=np.float32),
-                "conversion_rate": np.array([0.0], dtype=np.float32),
-                "price_volatility": np.array([0.0], dtype=np.float32),
-            },
-            "cost": self.commerce_platform.unit_cost.astype(np.float32),
-        }
-        return self.state, {}
+        self._t = 0
+        self._alpha_true = float(np.clip(self.c.agent_share, *self.c.alpha_bounds))
+        self._alpha_hat = float(self.c.agent_share)
+        self._reset_catalogue()
+        self._limbo = Limbo()
+        self._last_sessions = []
+        self._last_coi = None

-    def _step_jax(self, new_prices: np.ndarray) -> Tuple[Dict, Dict]:
-        self._jax_key, subkey = jax.random.split(self._jax_key)
-        alpha = float(np.clip(self.commerce_platform.alpha_hat, 0.0, 0.95))
-        n_agent = max(1, int(self.constraints.sessions_per_step * alpha))
-        n_human = max(1, self.constraints.sessions_per_step - n_agent)
-        batch = sample_sessions(subkey, self._jax_trans, n_human, n_agent, len(new_prices))
-        sim = compute_metrics(batch, new_prices, self.commerce_platform.unit_cost, self.commerce_platform.base_price)
-        result = {"revenue_observed": sim.revenue, "revenue_oracle": sim.revenue_oracle,
-                  "agent_loss": sim.agent_loss, "coi": sim.coi, "look_to_book": sim.look_to_book,
-                  "mean_sale_price": sim.mean_sale_price, "true_human_purchases": sim.n_human_purchases,
-                  "true_agent_purchases": sim.n_agent_purchases}
-        diagnostics = {"demand_human": sim.demand_human, "demand_agent": sim.demand_agent, "alpha_hat": alpha}
-        return result, diagnostics
+        prices = self._prices if self._prices is not None else np.zeros(self.n, dtype=np.float32)
+        obs = self._build_obs(prices, np.zeros(self.n, dtype=np.float32), 0.0, 0.0, 0.0)
+        return obs, {"alpha_true": self._alpha_true}

-    def step(self, action: np.ndarray):
-        self.t += 1
-        base_prices = self.state["elasticity"]["price"].astype(np.float32)
-        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
-                           self.constraints.system_min_price,
-                           self.constraints.system_max_price).astype(np.float32)
+    def step(self, action: np.ndarray) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]:
+        if self._prices is None:
+            raise RuntimeError("reset() must be called before step()")

-        self.state["elasticity"]["price"] = new_prices
-        if self.use_jax:
-            result, diagnostics = self._step_jax(new_prices)
-        else:
-            interactions_df, diagnostics = self.commerce_platform._simulate_sessions(new_prices)
-            result = self.commerce_platform.compute_interaction_features(interactions_df)
-        COI = float(result.get("coi", 0.0))
-
-        demand_vector = diagnostics.get("demand_human", np.zeros_like(new_prices)) + diagnostics.get(
-            "demand_agent", np.zeros_like(new_prices)
+        prev = self._prices
+        prices = constrain_prices(
+            prev,
+            np.asarray(action, dtype=np.float32),
+            costs=self._costs,
+            min_price=float(self.c.system_min_price),
+            max_price=float(self.c.system_max_price),
+            max_adjustment=float(self.c.max_price_adjustment),
+            min_margin_pct=float(self.c.min_margin_pct),
        )
-        self.state["elasticity"]["demand"] = demand_vector.astype(np.float32)
+        self._prices = prices
+        self._limbo.add_update("prices", prices)

-        volatility = 0.0 if self._prev_prices is None else \
-            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
-        self._prev_prices = new_prices.copy()
+        sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents = self._observe_market(prices)
+        self._last_sessions = sessions
+        self._limbo.add_update("demand", demand_map)

-        # update market observation features
-        total_demand = float(np.sum(demand_vector))
-        total_purchases = float(result.get("true_human_purchases", 0.0) + result.get("true_agent_purchases", 0.0))
-        conv_rate = total_purchases / max(total_demand, 1.0)
-        self.state["market"] = {
-            "alpha_hat": np.array([float(diagnostics.get("alpha_hat", self.commerce_platform.alpha_hat))], dtype=np.float32),
-            "revenue_rate": np.array([float(result.get("revenue_observed", 0.0))], dtype=np.float32),
-            "conversion_rate": np.array([float(np.clip(conv_rate, 0.0, 1.0))], dtype=np.float32),
-            "price_volatility": np.array([float(volatility)], dtype=np.float32),
-        }
-        self.state["cost"] = self.commerce_platform.unit_cost.astype(np.float32)
+        self._update_alpha_hat(self._last_sessions)
+        self._last_coi = compute_coi_window(self._last_sessions, self._costs, demand_mapping=demand_map)

-        # extract metrics with safe defaults for incomplete simulation
-        revenue_observed = float(result.get("revenue_observed", 0.0))
-        agent_loss = float(result.get("agent_loss", 0.0))
+        self._alpha_true = float(np.clip(self._alpha_true + self.c.alpha_drift, *self.c.alpha_bounds))
+        volatility = float(np.std((prices - prev) / (prev + 1e-6)))
+        reward = float(self._reward(prices, revenue, cost, volatility))
+        conversion = float(np.sum(purchases) / max(len(self._last_sessions), 1))

-        reward = (revenue_observed
-                  - COI
-                  - self.constraints.w_agent_loss * agent_loss
-                  - self.constraints.w_volatility * volatility
-                  - self.constraints.w_estimation_error)
+        self._t += 1
+        terminated = self._t >= int(self.c.max_steps)

-        terminated = self.t >= self.constraints.episode_length
+        obs = self._build_obs(prices, demand_by_product, revenue, conversion, min(volatility, 1.0))
        info = {
-            "t": self.t,
-            "revenue_observed": revenue_observed,
-            "revenue_oracle": float(result.get("revenue_oracle", revenue_observed)),
-            "agent_loss": agent_loss,
-            "ux_volatility": volatility,
-            "look_to_book": float(result.get("look_to_book", 0.0)),
-            "mean_sale_price": float(result.get("mean_sale_price", 0.0)),
-            "true_human_purchases_total": float(result.get("true_human_purchases", 0.0)),
-            "true_agent_purchases_total": float(result.get("true_agent_purchases", 0.0)),
-            "coi": COI,
-            "alpha_hat": diagnostics.get("alpha_hat", self.commerce_platform.alpha_hat),
-            "mean_human_demand": float(np.mean(diagnostics.get("demand_human", np.zeros_like(new_prices)))),
-            "mean_agent_demand": float(np.mean(diagnostics.get("demand_agent", np.zeros_like(new_prices)))),
+            "step": self._t,
+            "reward": reward,
+            "revenue": float(revenue),
+            "profit": float(revenue - cost),
+            "n_sessions": int(self.c.sessions_per_step),
+            "n_agents": int(n_agents),
+            "alpha_true": float(self._alpha_true),
+            "alpha_hat": float(self._alpha_hat),
+            "alpha_error": float(abs(self._alpha_hat - self._alpha_true)),
+            "price_std": float(np.std(prices)),
+            "price_volatility": float(volatility),
        }
-        if "delta_h_mean" in diagnostics:
+        if self._last_coi is not None:
            info.update(
                {
-                    "delta_h_mean": diagnostics["delta_h_mean"],
-                    "delta_a_mean": diagnostics["delta_a_mean"],
-                    "prob_agent_mean": diagnostics["prob_agent_mean"],
+                    "coi_policy": float(self._last_coi.policy),
+                    "coi_agent": float(self._last_coi.agent),
+                    "coi_leakage": float(self._last_coi.leak),
+                    "coi_survival": float(self._last_coi.survival_ratio),
+                    "coi_erosion": float(coi_erosion(self._last_coi.policy, self._last_coi.agent)),
                }
            )
-        return self.state, float(reward), terminated, False, info
+        return obs, reward, terminated, False, info

+    def render(self, mode: str = "human") -> str | None:
+        if self._prices is None:
+            return None
+        out = (
+            f"t={self._t}/{self.c.max_steps} "
+            f"alpha_true={self._alpha_true:.3f} alpha_hat={self._alpha_hat:.3f} "
+            f"price_std={float(np.std(self._prices)):.2f}"
+        )
+        if mode == "human":
+            print(out)
+        return out

-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-    from collections import defaultdict
-
-    env = PHANTOMEnv(constraints=BusinessLogicConstraints())
-    obs, _ = env.reset(seed=42)
-    metrics = defaultdict(list)
-    total_reward = 0.0
-    done = False
-
-    while not done:
-        action = env.action_space.sample()
-        obs, reward, done, _, info = env.step(action)
-        total_reward += reward
-        p_mean = float(np.mean(obs["elasticity"]["price"]))
-        q_mean = float(np.mean(obs["elasticity"]["demand"]))
-        p_std = float(np.std(obs["elasticity"]["price"]))
-
-        metrics['t'].append(info['t'])
-        metrics['price_mean'].append(p_mean)
-        metrics['price_std'].append(p_std)
-        metrics['demand_mean'].append(q_mean)
-        metrics['revenue_observed'].append(info['revenue_observed'])
-        metrics['revenue_oracle'].append(info['revenue_oracle'])
-        metrics['agent_loss'].append(info['agent_loss'])
-        metrics['ux_volatility'].append(info['ux_volatility'])
-        metrics['look_to_book'].append(info['look_to_book'])
-        metrics['reward'].append(reward)
-        metrics['human_purchases'].append(info['true_human_purchases_total'])
-        metrics['agent_purchases'].append(info['true_agent_purchases_total'])
-        metrics['coi'].append(info.get('coi', 0.0))
-        metrics['alpha_hat'].append(info.get('alpha_hat', env.commerce_platform.alpha_hat))
-        metrics['mean_human_demand'].append(info.get('mean_human_demand', 0.0))
-        metrics['mean_agent_demand'].append(info.get('mean_agent_demand', 0.0))
-        metrics['delta_h_mean'].append(info.get('delta_h_mean', 0.0))
-        metrics['delta_a_mean'].append(info.get('delta_a_mean', 0.0))
-        metrics['prob_agent_mean'].append(info.get('prob_agent_mean', 0.0))
-
-        if info['t'] % 20 == 0 or done:
-            print(f"t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} q={q_mean:6.2f} "
-                  f"rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
-                  f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
-                  f"coi={info.get('coi', 0.0):6.2f} alpha={info.get('alpha_hat', 0.0):4.2f} "
-                  f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
-
-    print(f"total_reward={total_reward:.2f}")
-
-    fig, axes = plt.subplots(3, 4, figsize=(18, 12))
-    fig.suptitle('PHANTOM Environment Run', fontsize=14, fontweight='bold')
-
-    plot_configs = [
-        ('price_mean', 'Mean Price', 'Price'),
-        ('demand_mean', 'Mean Demand (All)', 'Demand'),
-        ('mean_human_demand', 'Mean Human Demand', 'Count'),
-        ('mean_agent_demand', 'Mean Agent Demand', 'Count'),
-        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
-        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
-        ('coi', 'Cost of Information', 'COI'),
-        ('alpha_hat', 'Estimated α̂', 'alpha'),
-        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
-        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
-        ('reward', 'Step Reward', 'Reward'),
-        ('prob_agent_mean', 'Avg Agent Probability', 'Probability'),
-    ]
-
-    for idx, (key, title, ylabel) in enumerate(plot_configs):
-        ax = axes[idx // 4, idx % 4]
-        ax.plot(metrics['t'], metrics[key], color='blue', alpha=0.7, linewidth=1.5)
-        ax.set_xlabel('Step')
-        ax.set_ylabel(ylabel)
-        ax.set_title(title, fontsize=10, fontweight='bold')
-        ax.grid(True, alpha=0.3)
-
-    plt.tight_layout()
-    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
-    print("Plot saved to phantom_env_comparison.png")
-    plt.show()
+    def close(self) -> None:
+        return
				`@@ -0,0 +1,2 @@`
				`"""Case-specific simulations and experiments."""`
				`@@ -0,0 +1,2 @@`
				`"""Minimal thesis-aligned pricing simulation (self-contained)."""`