shock: defining new lab environment and formulation

This commit is contained in:
2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
"""
Thesis-specific implementation of the PHANTOM pricing defense framework.
This module implements the mathematical models from the thesis:
- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3)
- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1)
- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23)
- COIMetrics: Cost of Information tracking (Definition 1)
The platform configuration creates a research environment that directly
maps to the thesis mathematical framework for DR-RL experiments.
"""
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
from .execution import HybridExecutionModel, HybridExecutionConfig
from .objectives import RobustStackelbergObjective, COIObjective
from .platform import make_thesis_platform, ThesisConfig
from .metrics import COIMetrics, compute_coi, compute_separability
__all__ = [
'ContaminatedArrivalModel', 'ContaminatedArrivalConfig',
'HybridExecutionModel', 'HybridExecutionConfig',
'RobustStackelbergObjective', 'COIObjective',
'make_thesis_platform', 'ThesisConfig',
'COIMetrics', 'compute_coi', 'compute_separability',
]

327
lab/case/thesis/arrivals.py Normal file
View File

@@ -0,0 +1,327 @@
"""Contaminated arrivals using learned MDP kernels from behavior_loader.
Implements thesis demand model (Section 3.1):
- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3)
- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] (Eq 2)
- Per-session separability via KL divergence Δ_H, Δ_A (Eq 20-21)
The arrival model samples sessions from a mixture of human/agent behavioral profiles,
each session produces a trajectory τ_s and associated demand computation q(τ').
"""
from __future__ import annotations
from dataclasses import dataclass, field
from types import SimpleNamespace
from typing import Dict, List, Tuple, Optional
import numpy as np
from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
from ...outlet.constants import Side, OpportunityType
from ...outlet.math_util import poisson_arrivals
try:
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from sim.rl.behavior_loader.models import (
BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence
)
REAL_MDP = True
except ImportError:
REAL_MDP = False
kl_divergence = None
EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details",
"add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"}
EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page",
"view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item",
"add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"}
# action space partition A = A_nav A_cart A_filter A_dwell with signal weights ω (Table 1)
ACTION_WEIGHTS: Dict[str, float] = {
"add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0, # A_cart
"hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25, # A_dwell
"page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav
"search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0, # A_filter
}
@dataclass
class SessionDemand:
"""Per-session demand computation per thesis formulation (Section 3.1).
Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses
divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ').
"""
session_id: str
q: Dict[int, float] # q̂_i demand proxy per product (Eq 2)
trajectory: List[Dict] # τ_s = (e_{s,1}, ..., e_{s,L_s})
delta_h: float = 0.0 # D_KL(T̂' || T̄_H) (Eq 20)
delta_a: float = 0.0 # D_KL(T̂' || T̄_A) (Eq 21)
alpha_hat: float = 0.0 # per-session contamination estimate
actor_class: str = "H" # ground truth Y_s ∈ {H, A}
theta: Dict[str, float] = field(default_factory=dict)
def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]:
"""Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2."""
q = {i: 0.0 for i in range(n_products)}
for e in events:
action, pidx = e.get("eventName", ""), e.get("product_idx")
if pidx is not None and 0 <= pidx < n_products:
q[pidx] += ACTION_WEIGHTS.get(action, 0.1)
return q
def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]:
"""Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21)."""
if not events or kl_divergence is None:
return 0.0, 0.0
# build empirical transition kernel from trajectory
trans: Dict[str, Dict[str, int]] = {}
prev = "session_start"
for e in events:
curr = e.get("eventName", "session_end")
trans.setdefault(prev, {})
trans[prev][curr] = trans[prev].get(curr, 0) + 1
prev = curr
# normalize to probabilities
kernel = {}
for s, dests in trans.items():
total = sum(dests.values())
kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {}
# aggregate to event-level and compute KL divergence against reference kernels
delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
return delta_h, delta_a
def _canonicalize(raw: Dict) -> Dict:
out = {}
for src, dsts in raw.items():
sc = EVENT_CANON.get(src, src)
out.setdefault(sc, {})
for dst, p in dsts.items():
dc = EVENT_CANON.get(dst, dst)
out[sc][dc] = out[sc].get(dc, 0.0) + p
return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0}
class BehavioralProfile:
"""Markov profile from learned MDP kernels (Section 3.5.2).
Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19)
"""
STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
# fallback kernels T̄_H, T̄_A when real data unavailable
FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15},
"view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
"learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
"add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
"purchase_complete": {"session_end": 1.0}}
FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05},
"view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05},
"learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05},
"add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4},
"purchase_complete": {"session_end": 1.0}}
def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""):
self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95)
self.trans = self._load(data_dir) # T̂_Y transition kernel
self._ensure_terminal()
self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES}
def _load(self, data_dir: str) -> Dict:
if not REAL_MDP or not data_dir:
print("using fallback")
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
try:
mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP()
raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
except Exception:
print("using fallback")
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
def _ensure_terminal(self):
self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0)
self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1})
def _tprobs(self, state: str, pidx: int) -> Dict[str, float]:
probs = dict(self.trans.get(state, {"session_end": 1.0}))
if state == "add_item_to_cart":
base = probs.get("purchase_complete", 0.0)
df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0)
adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95)
rem = max(1e-6, 1.0 - adj)
other = sum(v for k, v in probs.items() if k != "purchase_complete")
probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()}
total = sum(probs.values())
return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0}
def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]:
events, fevts = [], []
state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices)))
cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05)
while state != "session_end" and len(events) < 40:
if state != "session_start":
row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human",
"eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}",
"price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"),
"ts": t, "unit_cost": cost, "base_price": float(prices[pidx])}
if state == "purchase_complete":
row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost)
events.append(row)
fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t))
probs = self._tprobs(state, pidx)
state = rng.choice(list(probs.keys()), p=list(probs.values()))
sh, sc = self.dwell.get(state, (2.0, 1.0))
t += max(0.3, rng.gamma(shape=sh, scale=sc))
return events, fevts
@dataclass
class ContaminatedArrivalConfig:
base_rate: float = 20.0
alpha_contamination: float = 0.2
alpha_drift: float = 0.0
alpha_bounds: tuple[float, float] = (0.0, 0.5)
human_views_range: tuple[int, int] = (1, 4)
agent_views_range: tuple[int, int] = (3, 10)
agent_systematic: bool = True
use_real_behavior: bool = True
human_data_dir: str = ""
agent_data_dir: str = ""
class ContaminatedArrivalModel:
"""Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3).
Samples sessions from human/agent behavioral profiles, computes per-session
demand proxy q̂ and divergence signals Δ_H, Δ_A for separability.
"""
def __init__(self, cfg: ContaminatedArrivalConfig | None = None):
self.cfg = cfg or ContaminatedArrivalConfig()
self._alpha = self.cfg.alpha_contamination
self._scount = 0
self._profiles: Dict[str, BehavioralProfile] = {}
self._ref_kernels: Dict[str, Dict] = {} # T̄_H, T̄_A reference kernels
self._session_demands: List[SessionDemand] = [] # collected session demands
@property
def alpha(self) -> float:
return self._alpha
def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile:
key = actor
if key not in self._profiles:
ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir
if not ddir and self.cfg.use_real_behavior:
base = Path(__file__).parent.parent.parent.parent / "experiments"
ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data"))
profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "")
self._profiles[key] = profile
self._ref_kernels[key] = profile.trans # cache T̄_Y for divergence
return self._profiles[key]
def get_ref_kernels(self) -> Tuple[Dict, Dict]:
"""Return reference transition kernels T̄_H, T̄_A for divergence computation."""
return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H),
self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A))
def get_session_demands(self) -> List[SessionDemand]:
"""Return collected session demands for downstream analysis."""
return self._session_demands
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
"""Sample arrivals as per Eq 3: mixture of human/agent demand distributions.
For each session s, computes:
- Trajectory τ_s from behavioral profile sampling
- Demand proxy q̂ via weighted action aggregation (Eq 2)
- Divergence signals Δ_H, Δ_A for separability (Eq 20-21)
- Per-session contamination estimate α̂(τ')
"""
cfg = self.cfg
if cfg.alpha_drift != 0:
self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds)
hidden.contamination = self._alpha
n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng)
prices, costs = instruments.refs, instruments.costs
margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0)
hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin)
ref_h, ref_a = self.get_ref_kernels()
opps = []
for _ in range(n_sess):
self._scount += 1
sid = f"s{self._scount:06d}"
is_agent = rng.random() < self._alpha
actor, probs = ("agents", aprob) if is_agent else ("humans", hprob)
profile = self._profile(actor, probs)
events, fevts = profile.sample(rng, sid, prices, costs)
# compute demand proxy q̂ per Eq 2
q = compute_demand_proxy(events, instruments.n)
# compute divergence signals Δ_H, Δ_A per Eq 20-21
delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a)
# per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A))
alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5
theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent
else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0})
# store session demand for downstream analysis
self._session_demands.append(SessionDemand(
session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a,
alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta))
viewed = list({e["product_idx"] for e in events if "product_idx" in e})
if not viewed:
vr = cfg.agent_views_range if is_agent else cfg.human_views_range
viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False))
for vi, iid in enumerate(viewed):
opps.append(Opportunity(
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt),
context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent,
'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed),
'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events,
'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a}))
return opps
@dataclass
class AdversarialArrivalConfig:
base_rate: float = 5.0
n_parallel_agents: int = 3
query_all_products: bool = True
class AdversarialArrivalModel:
"""Adversarial coordination (Theorem 1): as N->inf, COI->0."""
def __init__(self, cfg: AdversarialArrivalConfig | None = None):
self.cfg = cfg or AdversarialArrivalConfig()
self._qcount = 0
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
cfg, opps = self.cfg, []
for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)):
self._qcount += 1
for ai in range(cfg.n_parallel_agents):
sid = f"adv{self._qcount:06d}-{ai}"
prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1)
for iid in prods:
opps.append(Opportunity(
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
instrument_id=int(iid), size=1.0, t=t,
context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True,
'agent_index': ai, 'query_group': self._qcount,
'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}}))
return opps

View File

@@ -0,0 +1,91 @@
"""Execution models with divergent H/A behavior using ground truth labels."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict
import numpy as np
from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState
from ...outlet.math_util import sigmoid, safe_log, EPS
@dataclass
class HybridExecutionConfig:
human_base_prob: float = 0.3
human_elasticity: float = 2.5
agent_conversion: float = 0.01
cross_elasticity: float = 0.4
quality_weight: float = 0.2
use_separability: bool = False
class HybridExecutionModel:
"""Execution with divergent H/A behavior using ground truth labels."""
def __init__(self, cfg: HybridExecutionConfig | None = None):
self.cfg = cfg or HybridExecutionConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
cfg, idx = self.cfg, int(opp.instrument_id)
price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx])
ctx = opp.context
theta = ctx.get('theta', {})
is_agent = ctx.get('is_agent', False)
if is_agent:
return cfg.agent_conversion * theta.get('base_conversion', 1.0)
# human logit discrete choice
sens = theta.get('price_sensitivity', cfg.human_elasticity)
base = theta.get('base_conversion', cfg.human_base_prob)
u_price = -sens * safe_log(price / (ref + EPS))
quality = instruments.instruments[idx].attrs.get('quality', 0.5)
u_quality = cfg.quality_weight * quality
u_comp = 0.0
if market and market.competitor_quotes is not None:
cp = market.competitor_quotes[idx]
if cp < price:
u_comp = -cfg.cross_elasticity * (price - cp) / ref
utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp
return float(sigmoid(utility))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
if context is None:
return fills / (self.cfg.human_base_prob + EPS)
agent_frac = context.get('contamination', 0.0)
return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS)
@dataclass
class SeparableExecutionConfig:
human_funnel: Dict[str, float] = None
agent_funnel: Dict[str, float] = None
def __post_init__(self):
self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6}
self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1}
class SeparableExecutionModel:
"""Execution with Markov funnel kernels using ground truth labels."""
def __init__(self, cfg: SeparableExecutionConfig | None = None):
self.cfg = cfg or SeparableExecutionConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
is_agent = opp.context.get('is_agent', False)
probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel
p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase']
if not is_agent:
idx = int(opp.instrument_id)
price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS)
p *= np.exp(-0.5 * (price_ratio - 1.0))
return float(np.clip(p, 0, 1))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
h = self.cfg.human_funnel
exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase']
return fills / (exp_conv + EPS)

102
lab/case/thesis/metrics.py Normal file
View File

@@ -0,0 +1,102 @@
"""Thesis metrics for COI and behavioral analysis using ground truth labels."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict
import numpy as np
from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet
from ...outlet.math_util import safe_log, EPS
@dataclass
class COIMetrics:
coi_level: float = 0.0
coi_leakage: float = 0.0
realized_premium: float = 0.0
theoretical_max: float = 0.0
erosion_rate: float = 0.0
def to_dict(self) -> dict[str, float]:
return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']}
def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics:
prices, costs, refs = quote.prices, instruments.costs, instruments.refs
margins = prices - costs
coi_level = float(np.mean(margins))
theoretical_max = float(np.mean(costs))
realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0
price_var = float(np.var(prices / refs))
coi_leakage = contamination * (coi_level + price_var)
erosion_rate = contamination * coi_level / (theoretical_max + EPS)
return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium,
theoretical_max=theoretical_max, erosion_rate=erosion_rate)
@dataclass
class SeparabilityMetrics:
classification_accuracy: float = 0.0
estimated_alpha: float = 0.0
n_human_sessions: int = 0
n_agent_sessions: int = 0
def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics:
"""Compute separability using ground truth labels only."""
if logs.events is None or len(logs.events) == 0:
return SeparabilityMetrics(estimated_alpha=true_alpha)
sessions: Dict[str, bool] = {}
for evt in logs.events:
sid = evt.metadata.get('session_id', evt.opportunity_id)
if sid not in sessions:
sessions[sid] = evt.metadata.get('is_agent', False)
n_agent = sum(1 for is_agent in sessions.values() if is_agent)
n_human = len(sessions) - n_agent
est_alpha = n_agent / len(sessions) if sessions else 0.0
return SeparabilityMetrics(
classification_accuracy=1.0, # ground truth is always correct
estimated_alpha=est_alpha,
n_human_sessions=n_human,
n_agent_sessions=n_agent)
@dataclass
class RevenueAttribution:
total_revenue: float = 0.0
human_revenue: float = 0.0
agent_revenue: float = 0.0
human_conversion: float = 0.0
agent_conversion: float = 0.0
def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution:
if logs.executions is None:
return RevenueAttribution(total_revenue=metrics.revenue)
human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0
for exe in logs.executions:
if exe.propensity < 0.05:
agent_rev += exe.price * exe.size_filled
agent_cnt += 1
else:
human_rev += exe.price * exe.size_filled
human_cnt += 1
total_exp = logs.aggregates.get('n_arrivals', 1)
return RevenueAttribution(
total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev,
human_conversion=human_cnt / (total_exp * 0.8 + EPS),
agent_conversion=agent_cnt / (total_exp * 0.2 + EPS))
def order_statistic_erosion(n_agents: int, price_variance: float) -> float:
"""COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min."""
if n_agents <= 1:
return 0.0
sigma, log_n = np.sqrt(price_variance), safe_log(n_agents)
if log_n < 1:
return 0.0
shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
return float(min(shift / (sigma * 2 + EPS), 1.0))

View File

@@ -0,0 +1,228 @@
"""
Thesis-specific objectives implementing robust pricing under contamination.
Implements the Maximin objective from Eq 23:
π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
Key components:
- COIObjective: Cost of Information penalty (Definition 1)
- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
- UXPenalty: User experience degradation from volatility
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ...outlet.objectives.base import BaseObjective, CompositeObjective
from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
from ...outlet.math_util import safe_log, EPS
class COIObjective(BaseObjective):
"""Cost of Information penalty from Definition 1.
COI(π) = E[P] - p_min
The expected price premium over marginal cost represents the platform's
pricing power. Agent reconnaissance erodes this by revealing price
distribution to buyers.
We implement COI_leakage = f(τ') · InfoValue(p, τ')
where f(τ') is the estimated agent probability.
"""
def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
"""
Args:
lambda_coi: Weight on COI penalty
use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
"""
self.lambda_coi = lambda_coi
self.use_revelation = use_revelation
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# COI_leakage = α · InfoValue
alpha = hidden.contamination
if self.use_revelation:
# revelation surrogate: rare prices reveal more about policy
# InfoValue = -log(π(p|τ')) ≈ surprise of the price
price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
info_value = price_surprise
else:
# query-tax surrogate: each agent query incurs constant leakage
info_value = 1.0
leakage = alpha * info_value
return -self.lambda_coi * leakage
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
return {
'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'contamination': alpha,
'avg_margin': float(np.mean(margins)),
}
@dataclass
class RobustObjectiveConfig:
"""Configuration for robust Stackelberg objective.
Attributes:
lambda_coi: Weight on COI penalty (λ in Eq 23)
lambda_ux: Weight on UX penalty
lambda_volatility: Weight on price volatility penalty
gamma_inventory: Inventory risk aversion
wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
"""
lambda_coi: float = 0.5
lambda_ux: float = 0.1
lambda_volatility: float = 0.2
gamma_inventory: float = 0.1
wasserstein_epsilon: float = 0.1
class RobustStackelbergObjective(BaseObjective):
"""Implements the Maximin Objective from thesis Eq 23.
π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
The objective balances:
1. Revenue R(p,d) from human purchases
2. COI penalty for information leakage to agents
3. UX penalty for price volatility
4. Inventory/holding costs
The min over ambiguity set U_ε is approximated by penalizing
high contamination scenarios more heavily.
"""
def __init__(self, cfg: RobustObjectiveConfig | None = None):
self.cfg = cfg or RobustObjectiveConfig()
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
cfg = self.cfg
# 1. base revenue (R(p,d))
revenue = metrics.revenue
cost = metrics.cost
profit = revenue - cost
# 2. COI penalty: scales with contamination and margin extraction
# high margins + high contamination = high leakage
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
coi_penalty = cfg.lambda_coi * avg_margin * alpha
# 3. UX penalty: price volatility harms legitimate users
volatility_penalty = cfg.lambda_volatility * metrics.volatility
# 4. inventory/position cost
position_penalty = cfg.gamma_inventory * metrics.position_cost
# 5. lost opportunity cost (stockouts)
lost_penalty = 0.1 * metrics.lost_opportunity
# robust adjustment: under adversarial distribution Q,
# expect lower revenue and higher costs
# approximate via worst-case contamination within ε-ball
worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
return total
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
cfg = self.cfg
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
return {
'revenue': metrics.revenue,
'cost': metrics.cost,
'profit': metrics.revenue - metrics.cost,
'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
'lost_penalty': -0.1 * metrics.lost_opportunity,
'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
'contamination': alpha,
'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
}
class UXPenalty(BaseObjective):
"""User experience penalty from price volatility.
High price volatility degrades UX for legitimate human users.
This term ensures the defense doesn't harm real customers while
protecting against agent reconnaissance.
"""
def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
self.scale = scale
self.max_vol = max_acceptable_volatility
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# penalty increases quadratically beyond threshold
excess_vol = max(0, metrics.volatility - self.max_vol)
return -self.scale * (excess_vol ** 2)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {
'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'volatility': metrics.volatility,
}
class AdaptiveObjective(BaseObjective):
"""Objective that adapts weights based on estimated contamination.
When contamination is low, focus on revenue maximization.
When contamination is high, increase COI defense weight.
"""
def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
adaptation_rate: float = 2.0):
self.base_lambda = base_lambda_coi
self.max_lambda = max_lambda_coi
self.rate = adaptation_rate
def _adaptive_lambda(self, alpha: float) -> float:
# sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
from ...outlet.math_util import sigmoid
scale = sigmoid(self.rate * (alpha - 0.3))
return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
alpha = hidden.contamination
lambda_coi = self._adaptive_lambda(alpha)
profit = metrics.revenue - metrics.cost
margins = quote.prices - instruments.costs
coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
return profit - coi_penalty
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
return {
'profit': metrics.revenue - metrics.cost,
'adaptive_lambda': self._adaptive_lambda(alpha),
'contamination': alpha,
}
def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
lambda_vol: float = 0.2) -> CompositeObjective:
"""Create the standard thesis objective composition."""
return CompositeObjective([
(RobustStackelbergObjective(RobustObjectiveConfig(
lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
])

176
lab/case/thesis/platform.py Normal file
View File

@@ -0,0 +1,176 @@
"""Thesis platform with real MDP behavioral models and separability scoring."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
from ...outlet.mechanisms.posted_price import PostedPriceConfig
from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
from .execution import HybridExecutionModel, HybridExecutionConfig
from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
@dataclass
class ThesisConfig:
# instruments
n_instruments: int = 10
cost_range: tuple[float, float] = (5.0, 50.0)
margin_range: tuple[float, float] = (0.2, 0.5)
# contamination (Section 3.1)
alpha_contamination: float = 0.2
alpha_drift: float = 0.0
alpha_bounds: tuple[float, float] = (0.0, 0.5)
# objectives (Eq 23)
lambda_coi: float = 0.5
lambda_ux: float = 0.1
lambda_volatility: float = 0.2
wasserstein_epsilon: float = 0.1
# arrivals
sessions_per_step: int = 30
human_views_range: tuple[int, int] = (1, 4)
agent_views_range: tuple[int, int] = (3, 10)
# inventory
initial_inventory: float = 100.0
holding_cost_rate: float = 0.002
# real behavioral models (from sim.rl)
use_real_behavior: bool = True
use_separability: bool = False # disabled until classifier trained
human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
# simulation
max_steps: int = 500
seed: int | None = 24
log_level: LogLevel = LogLevel.AGG_ONLY
def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
"""Resolve data directories for behavioral models."""
base = Path(__file__).parent.parent.parent.parent / "experiments"
human = cfg.human_data_dir or str(base / "collected_data")
agent = cfg.agent_data_dir or str(base / "agents/collected_data")
return human, agent
def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
"""Create platform with real MDP behavioral models.
Implements:
- Contaminated arrivals using learned MDP kernels from behavior_loader
- Hybrid execution with real separability scoring from lib.separability
- Robust Stackelberg objective (Eq 23)
"""
cfg = cfg or ThesisConfig()
rng = np.random.default_rng(cfg.seed)
human_dir, agent_dir = _resolve_data_dirs(cfg)
instruments = make_instruments(
n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
inst_type=InstrumentType.SKU, rng=rng)
instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
base_rate=cfg.sessions_per_step,
alpha_contamination=cfg.alpha_contamination,
alpha_drift=cfg.alpha_drift,
alpha_bounds=cfg.alpha_bounds,
human_views_range=cfg.human_views_range,
agent_views_range=cfg.agent_views_range,
use_real_behavior=cfg.use_real_behavior,
human_data_dir=human_dir,
agent_data_dir=agent_dir,
))
execution = HybridExecutionModel(HybridExecutionConfig(
use_separability=cfg.use_separability,
))
mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
market = None
objective = RobustStackelbergObjective(RobustObjectiveConfig(
lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
@dataclass
class AblationConfig(ThesisConfig):
disable_coi_penalty: bool = False
disable_ux_penalty: bool = False
disable_contamination: bool = False
disable_real_behavior: bool = False
def make_ablation_platform(cfg: AblationConfig) -> Platform:
if cfg.disable_coi_penalty:
cfg.lambda_coi = 0.0
if cfg.disable_ux_penalty:
cfg.lambda_ux = 0.0
if cfg.disable_contamination:
cfg.alpha_contamination = 0.0
if cfg.disable_real_behavior:
cfg.use_real_behavior = False
cfg.use_separability = False
return make_thesis_platform(cfg)
def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
"""Test performance across contamination levels (Theorem 1 validation)."""
from ...experiments.eval import rollout, fixed_price_policy
results = {}
base_cfg = base_cfg or ThesisConfig()
for alpha in alpha_values:
cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
alpha_contamination=alpha)
platform = make_thesis_platform(cfg)
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps, seed=seed)
results[alpha] = {
'total_reward': result.total_reward,
'total_pnl': result.total_pnl,
'avg_conversion': result.avg_conversion,
'final_contamination': platform._hidden.contamination,
}
return results
def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
"""Compare real vs synthetic behavioral models."""
from ...experiments.eval import rollout, fixed_price_policy
base_cfg = base_cfg or ThesisConfig()
modes = {
'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
}
results = {}
for name, cfg in modes.items():
platform = make_thesis_platform(cfg)
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps, seed=seed)
results[name] = {
'total_reward': result.total_reward,
'total_pnl': result.total_pnl,
'avg_conversion': result.avg_conversion,
}
return results

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python
"""Thesis simulation experiments with real MDP behavioral models."""
from __future__ import annotations
import sys
from pathlib import Path
if __name__ == '__main__':
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from lab.case.thesis.platform import make_thesis_platform, ThesisConfig
from lab.case.thesis.metrics import compute_coi, compute_separability
from lab.experiments.eval import compare_policies
import numpy as np
def demo_basic_simulation():
print("=" * 70)
print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)")
print("=" * 70)
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5,
max_steps=100, seed=42, use_real_behavior=True)
platform = make_thesis_platform(cfg)
print(f"\nInstruments: {platform.instruments.n}")
print(f"Reference prices: {platform.instruments.refs.round(2)}")
print(f"Costs: {platform.instruments.costs.round(2)}")
print(f"Initial contamination alpha={cfg.alpha_contamination}")
print(f"Using real behavior: {cfg.use_real_behavior}")
result = platform.reset(seed=42)
total_reward, coi_history = 0, []
print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}")
print("-" * 55)
for t in range(cfg.max_steps):
action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n)
result = platform.step(action)
total_reward += result.reward
coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination)
coi_history.append(coi.coi_level)
if t % 20 == 0:
print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} "
f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}")
print("-" * 55)
print(f"Total Reward: {total_reward:.2f}")
print(f"Average COI: {np.mean(coi_history):.2f}")
print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}")
def demo_contamination_sweep():
print("\n" + "=" * 70)
print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)")
print("=" * 70)
from lab.case.thesis.platform import sweep_contamination
trials = 20
alpha_values = [i/trials for i in range(trials)]
results = sweep_contamination(alpha_values, n_steps=100, seed=42)
print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}")
print("-" * 45)
for alpha, m in sorted(results.items()):
print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}")
rewards = [results[a]['total_reward'] for a in sorted(results.keys())]
dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)])
trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1]
print(f"Trend (alpha~reward correlation): {trend:.3f}")
def demo_policy_comparison():
print("\n" + "=" * 70)
print("EXPERIMENT: Policy Comparison under Contamination")
print("=" * 70)
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42)
platform = make_thesis_platform(cfg)
def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0
def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0
def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0
def adaptive_policy(obs, t):
fills = obs[platform.instruments.n:2*platform.instruments.n]
exp = obs[2*platform.instruments.n:3*platform.instruments.n]
conv = np.sum(fills) / (np.sum(exp) + 1e-8)
return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0
policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy,
'conservative': conservative_policy, 'adaptive': adaptive_policy}
results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42)
print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}")
print("-" * 65)
for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']):
print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} "
f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}")
def demo_session_analysis():
"""Analyze session-level behavior from MDP trajectories."""
print("\n" + "=" * 70)
print("EXPERIMENT: Session Analysis (Ground Truth)")
print("=" * 70)
from lab.outlet.constants import LogLevel
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50,
log_level=LogLevel.FULL, seed=42, use_real_behavior=True)
platform = make_thesis_platform(cfg)
result = platform.reset(seed=42)
human_sessions, agent_sessions = 0, 0
for t in range(cfg.max_steps):
action = platform.instruments.refs * 1.1
result = platform.step(action)
sep = compute_separability(result.logs, result.hidden.contamination)
human_sessions += sep.n_human_sessions
agent_sessions += sep.n_agent_sessions
total = human_sessions + agent_sessions
print(f"\nTotal sessions: {total}")
print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)")
print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)")
print(f"True contamination: {cfg.alpha_contamination:.1%}")
print(f"Observed contamination: {agent_sessions/total:.1%}")
if __name__ == '__main__':
demo_basic_simulation()
demo_contamination_sweep()
# demo_policy_comparison()
# demo_session_analysis()