mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
shock: defining new lab environment and formulation
This commit is contained in:
6
lab/case/__init__.py
Normal file
6
lab/case/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Case studies implementing specific research scenarios.
|
||||
|
||||
Available cases:
|
||||
- thesis: PHANTOM thesis implementation with contaminated demand and DR-RL
|
||||
"""
|
||||
25
lab/case/thesis/__init__.py
Normal file
25
lab/case/thesis/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Thesis-specific implementation of the PHANTOM pricing defense framework.
|
||||
|
||||
This module implements the mathematical models from the thesis:
|
||||
- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3)
|
||||
- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1)
|
||||
- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23)
|
||||
- COIMetrics: Cost of Information tracking (Definition 1)
|
||||
|
||||
The platform configuration creates a research environment that directly
|
||||
maps to the thesis mathematical framework for DR-RL experiments.
|
||||
"""
|
||||
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
|
||||
from .execution import HybridExecutionModel, HybridExecutionConfig
|
||||
from .objectives import RobustStackelbergObjective, COIObjective
|
||||
from .platform import make_thesis_platform, ThesisConfig
|
||||
from .metrics import COIMetrics, compute_coi, compute_separability
|
||||
|
||||
__all__ = [
|
||||
'ContaminatedArrivalModel', 'ContaminatedArrivalConfig',
|
||||
'HybridExecutionModel', 'HybridExecutionConfig',
|
||||
'RobustStackelbergObjective', 'COIObjective',
|
||||
'make_thesis_platform', 'ThesisConfig',
|
||||
'COIMetrics', 'compute_coi', 'compute_separability',
|
||||
]
|
||||
327
lab/case/thesis/arrivals.py
Normal file
327
lab/case/thesis/arrivals.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""Contaminated arrivals using learned MDP kernels from behavior_loader.
|
||||
|
||||
Implements thesis demand model (Section 3.1):
|
||||
- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3)
|
||||
- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] (Eq 2)
|
||||
- Per-session separability via KL divergence Δ_H, Δ_A (Eq 20-21)
|
||||
|
||||
The arrival model samples sessions from a mixture of human/agent behavioral profiles,
|
||||
each session produces a trajectory τ_s and associated demand computation q(τ').
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from types import SimpleNamespace
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import numpy as np
|
||||
from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
|
||||
from ...outlet.constants import Side, OpportunityType
|
||||
from ...outlet.math_util import poisson_arrivals
|
||||
|
||||
try:
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
||||
from sim.rl.behavior_loader.models import (
|
||||
BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence
|
||||
)
|
||||
REAL_MDP = True
|
||||
except ImportError:
|
||||
REAL_MDP = False
|
||||
kl_divergence = None
|
||||
|
||||
EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details",
|
||||
"add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"}
|
||||
EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page",
|
||||
"view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item",
|
||||
"add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"}
|
||||
|
||||
# action space partition A = A_nav ∪ A_cart ∪ A_filter ∪ A_dwell with signal weights ω (Table 1)
|
||||
ACTION_WEIGHTS: Dict[str, float] = {
|
||||
"add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0, # A_cart
|
||||
"hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25, # A_dwell
|
||||
"page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav
|
||||
"search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0, # A_filter
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionDemand:
|
||||
"""Per-session demand computation per thesis formulation (Section 3.1).
|
||||
|
||||
Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses
|
||||
divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ').
|
||||
"""
|
||||
session_id: str
|
||||
q: Dict[int, float] # q̂_i demand proxy per product (Eq 2)
|
||||
trajectory: List[Dict] # τ_s = (e_{s,1}, ..., e_{s,L_s})
|
||||
delta_h: float = 0.0 # D_KL(T̂' || T̄_H) (Eq 20)
|
||||
delta_a: float = 0.0 # D_KL(T̂' || T̄_A) (Eq 21)
|
||||
alpha_hat: float = 0.0 # per-session contamination estimate
|
||||
actor_class: str = "H" # ground truth Y_s ∈ {H, A}
|
||||
theta: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
|
||||
def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]:
|
||||
"""Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2."""
|
||||
q = {i: 0.0 for i in range(n_products)}
|
||||
for e in events:
|
||||
action, pidx = e.get("eventName", ""), e.get("product_idx")
|
||||
if pidx is not None and 0 <= pidx < n_products:
|
||||
q[pidx] += ACTION_WEIGHTS.get(action, 0.1)
|
||||
return q
|
||||
|
||||
|
||||
def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]:
|
||||
"""Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21)."""
|
||||
if not events or kl_divergence is None:
|
||||
return 0.0, 0.0
|
||||
# build empirical transition kernel from trajectory
|
||||
trans: Dict[str, Dict[str, int]] = {}
|
||||
prev = "session_start"
|
||||
for e in events:
|
||||
curr = e.get("eventName", "session_end")
|
||||
trans.setdefault(prev, {})
|
||||
trans[prev][curr] = trans[prev].get(curr, 0) + 1
|
||||
prev = curr
|
||||
# normalize to probabilities
|
||||
kernel = {}
|
||||
for s, dests in trans.items():
|
||||
total = sum(dests.values())
|
||||
kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {}
|
||||
# aggregate to event-level and compute KL divergence against reference kernels
|
||||
delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
|
||||
delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
|
||||
return delta_h, delta_a
|
||||
|
||||
def _canonicalize(raw: Dict) -> Dict:
|
||||
out = {}
|
||||
for src, dsts in raw.items():
|
||||
sc = EVENT_CANON.get(src, src)
|
||||
out.setdefault(sc, {})
|
||||
for dst, p in dsts.items():
|
||||
dc = EVENT_CANON.get(dst, dst)
|
||||
out[sc][dc] = out[sc].get(dc, 0.0) + p
|
||||
return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0}
|
||||
|
||||
|
||||
class BehavioralProfile:
|
||||
"""Markov profile from learned MDP kernels (Section 3.5.2).
|
||||
|
||||
Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19)
|
||||
"""
|
||||
STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
|
||||
# fallback kernels T̄_H, T̄_A when real data unavailable
|
||||
FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15},
|
||||
"view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
|
||||
"learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
|
||||
"add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
|
||||
"purchase_complete": {"session_end": 1.0}}
|
||||
FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05},
|
||||
"view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05},
|
||||
"learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05},
|
||||
"add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4},
|
||||
"purchase_complete": {"session_end": 1.0}}
|
||||
|
||||
def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""):
|
||||
self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95)
|
||||
self.trans = self._load(data_dir) # T̂_Y transition kernel
|
||||
self._ensure_terminal()
|
||||
self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES}
|
||||
|
||||
def _load(self, data_dir: str) -> Dict:
|
||||
if not REAL_MDP or not data_dir:
|
||||
print("using fallback")
|
||||
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
|
||||
try:
|
||||
mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP()
|
||||
raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
|
||||
return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
|
||||
except Exception:
|
||||
print("using fallback")
|
||||
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
|
||||
|
||||
def _ensure_terminal(self):
|
||||
self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0)
|
||||
self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1})
|
||||
|
||||
def _tprobs(self, state: str, pidx: int) -> Dict[str, float]:
|
||||
probs = dict(self.trans.get(state, {"session_end": 1.0}))
|
||||
if state == "add_item_to_cart":
|
||||
base = probs.get("purchase_complete", 0.0)
|
||||
df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0)
|
||||
adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95)
|
||||
rem = max(1e-6, 1.0 - adj)
|
||||
other = sum(v for k, v in probs.items() if k != "purchase_complete")
|
||||
probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()}
|
||||
total = sum(probs.values())
|
||||
return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0}
|
||||
|
||||
def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]:
|
||||
events, fevts = [], []
|
||||
state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices)))
|
||||
cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05)
|
||||
|
||||
while state != "session_end" and len(events) < 40:
|
||||
if state != "session_start":
|
||||
row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human",
|
||||
"eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}",
|
||||
"price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"),
|
||||
"ts": t, "unit_cost": cost, "base_price": float(prices[pidx])}
|
||||
if state == "purchase_complete":
|
||||
row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost)
|
||||
events.append(row)
|
||||
fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t))
|
||||
|
||||
probs = self._tprobs(state, pidx)
|
||||
state = rng.choice(list(probs.keys()), p=list(probs.values()))
|
||||
sh, sc = self.dwell.get(state, (2.0, 1.0))
|
||||
t += max(0.3, rng.gamma(shape=sh, scale=sc))
|
||||
return events, fevts
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContaminatedArrivalConfig:
|
||||
base_rate: float = 20.0
|
||||
alpha_contamination: float = 0.2
|
||||
alpha_drift: float = 0.0
|
||||
alpha_bounds: tuple[float, float] = (0.0, 0.5)
|
||||
human_views_range: tuple[int, int] = (1, 4)
|
||||
agent_views_range: tuple[int, int] = (3, 10)
|
||||
agent_systematic: bool = True
|
||||
use_real_behavior: bool = True
|
||||
human_data_dir: str = ""
|
||||
agent_data_dir: str = ""
|
||||
|
||||
|
||||
class ContaminatedArrivalModel:
|
||||
"""Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3).
|
||||
|
||||
Samples sessions from human/agent behavioral profiles, computes per-session
|
||||
demand proxy q̂ and divergence signals Δ_H, Δ_A for separability.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: ContaminatedArrivalConfig | None = None):
|
||||
self.cfg = cfg or ContaminatedArrivalConfig()
|
||||
self._alpha = self.cfg.alpha_contamination
|
||||
self._scount = 0
|
||||
self._profiles: Dict[str, BehavioralProfile] = {}
|
||||
self._ref_kernels: Dict[str, Dict] = {} # T̄_H, T̄_A reference kernels
|
||||
self._session_demands: List[SessionDemand] = [] # collected session demands
|
||||
|
||||
@property
|
||||
def alpha(self) -> float:
|
||||
return self._alpha
|
||||
|
||||
def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile:
|
||||
key = actor
|
||||
if key not in self._profiles:
|
||||
ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir
|
||||
if not ddir and self.cfg.use_real_behavior:
|
||||
base = Path(__file__).parent.parent.parent.parent / "experiments"
|
||||
ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data"))
|
||||
profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "")
|
||||
self._profiles[key] = profile
|
||||
self._ref_kernels[key] = profile.trans # cache T̄_Y for divergence
|
||||
return self._profiles[key]
|
||||
|
||||
def get_ref_kernels(self) -> Tuple[Dict, Dict]:
|
||||
"""Return reference transition kernels T̄_H, T̄_A for divergence computation."""
|
||||
return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H),
|
||||
self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A))
|
||||
|
||||
def get_session_demands(self) -> List[SessionDemand]:
|
||||
"""Return collected session demands for downstream analysis."""
|
||||
return self._session_demands
|
||||
|
||||
def sample(self, t: float, dt: float, instruments: InstrumentSet,
|
||||
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
|
||||
"""Sample arrivals as per Eq 3: mixture of human/agent demand distributions.
|
||||
|
||||
For each session s, computes:
|
||||
- Trajectory τ_s from behavioral profile sampling
|
||||
- Demand proxy q̂ via weighted action aggregation (Eq 2)
|
||||
- Divergence signals Δ_H, Δ_A for separability (Eq 20-21)
|
||||
- Per-session contamination estimate α̂(τ')
|
||||
"""
|
||||
cfg = self.cfg
|
||||
if cfg.alpha_drift != 0:
|
||||
self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds)
|
||||
hidden.contamination = self._alpha
|
||||
|
||||
n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng)
|
||||
prices, costs = instruments.refs, instruments.costs
|
||||
margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0)
|
||||
hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin)
|
||||
ref_h, ref_a = self.get_ref_kernels()
|
||||
|
||||
opps = []
|
||||
for _ in range(n_sess):
|
||||
self._scount += 1
|
||||
sid = f"s{self._scount:06d}"
|
||||
is_agent = rng.random() < self._alpha
|
||||
actor, probs = ("agents", aprob) if is_agent else ("humans", hprob)
|
||||
profile = self._profile(actor, probs)
|
||||
events, fevts = profile.sample(rng, sid, prices, costs)
|
||||
|
||||
# compute demand proxy q̂ per Eq 2
|
||||
q = compute_demand_proxy(events, instruments.n)
|
||||
|
||||
# compute divergence signals Δ_H, Δ_A per Eq 20-21
|
||||
delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a)
|
||||
# per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A))
|
||||
alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5
|
||||
|
||||
theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent
|
||||
else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0})
|
||||
|
||||
# store session demand for downstream analysis
|
||||
self._session_demands.append(SessionDemand(
|
||||
session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a,
|
||||
alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta))
|
||||
|
||||
viewed = list({e["product_idx"] for e in events if "product_idx" in e})
|
||||
if not viewed:
|
||||
vr = cfg.agent_views_range if is_agent else cfg.human_views_range
|
||||
viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False))
|
||||
|
||||
for vi, iid in enumerate(viewed):
|
||||
opps.append(Opportunity(
|
||||
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
|
||||
instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt),
|
||||
context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent,
|
||||
'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed),
|
||||
'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events,
|
||||
'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a}))
|
||||
return opps
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdversarialArrivalConfig:
|
||||
base_rate: float = 5.0
|
||||
n_parallel_agents: int = 3
|
||||
query_all_products: bool = True
|
||||
|
||||
|
||||
class AdversarialArrivalModel:
|
||||
"""Adversarial coordination (Theorem 1): as N->inf, COI->0."""
|
||||
|
||||
def __init__(self, cfg: AdversarialArrivalConfig | None = None):
|
||||
self.cfg = cfg or AdversarialArrivalConfig()
|
||||
self._qcount = 0
|
||||
|
||||
def sample(self, t: float, dt: float, instruments: InstrumentSet,
|
||||
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
|
||||
cfg, opps = self.cfg, []
|
||||
for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)):
|
||||
self._qcount += 1
|
||||
for ai in range(cfg.n_parallel_agents):
|
||||
sid = f"adv{self._qcount:06d}-{ai}"
|
||||
prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1)
|
||||
for iid in prods:
|
||||
opps.append(Opportunity(
|
||||
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
|
||||
instrument_id=int(iid), size=1.0, t=t,
|
||||
context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True,
|
||||
'agent_index': ai, 'query_group': self._qcount,
|
||||
'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}}))
|
||||
return opps
|
||||
91
lab/case/thesis/execution.py
Normal file
91
lab/case/thesis/execution.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Execution models with divergent H/A behavior using ground truth labels."""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
import numpy as np
|
||||
from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState
|
||||
from ...outlet.math_util import sigmoid, safe_log, EPS
|
||||
|
||||
|
||||
@dataclass
|
||||
class HybridExecutionConfig:
|
||||
human_base_prob: float = 0.3
|
||||
human_elasticity: float = 2.5
|
||||
agent_conversion: float = 0.01
|
||||
cross_elasticity: float = 0.4
|
||||
quality_weight: float = 0.2
|
||||
use_separability: bool = False
|
||||
|
||||
|
||||
class HybridExecutionModel:
|
||||
"""Execution with divergent H/A behavior using ground truth labels."""
|
||||
|
||||
def __init__(self, cfg: HybridExecutionConfig | None = None):
|
||||
self.cfg = cfg or HybridExecutionConfig()
|
||||
|
||||
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
|
||||
market: MarketState | None, rng: np.random.Generator) -> float:
|
||||
cfg, idx = self.cfg, int(opp.instrument_id)
|
||||
price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx])
|
||||
ctx = opp.context
|
||||
theta = ctx.get('theta', {})
|
||||
is_agent = ctx.get('is_agent', False)
|
||||
|
||||
if is_agent:
|
||||
return cfg.agent_conversion * theta.get('base_conversion', 1.0)
|
||||
|
||||
# human logit discrete choice
|
||||
sens = theta.get('price_sensitivity', cfg.human_elasticity)
|
||||
base = theta.get('base_conversion', cfg.human_base_prob)
|
||||
u_price = -sens * safe_log(price / (ref + EPS))
|
||||
quality = instruments.instruments[idx].attrs.get('quality', 0.5)
|
||||
u_quality = cfg.quality_weight * quality
|
||||
|
||||
u_comp = 0.0
|
||||
if market and market.competitor_quotes is not None:
|
||||
cp = market.competitor_quotes[idx]
|
||||
if cp < price:
|
||||
u_comp = -cfg.cross_elasticity * (price - cp) / ref
|
||||
|
||||
utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp
|
||||
return float(sigmoid(utility))
|
||||
|
||||
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
|
||||
if context is None:
|
||||
return fills / (self.cfg.human_base_prob + EPS)
|
||||
agent_frac = context.get('contamination', 0.0)
|
||||
return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SeparableExecutionConfig:
|
||||
human_funnel: Dict[str, float] = None
|
||||
agent_funnel: Dict[str, float] = None
|
||||
|
||||
def __post_init__(self):
|
||||
self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6}
|
||||
self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1}
|
||||
|
||||
|
||||
class SeparableExecutionModel:
|
||||
"""Execution with Markov funnel kernels using ground truth labels."""
|
||||
|
||||
def __init__(self, cfg: SeparableExecutionConfig | None = None):
|
||||
self.cfg = cfg or SeparableExecutionConfig()
|
||||
|
||||
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
|
||||
market: MarketState | None, rng: np.random.Generator) -> float:
|
||||
is_agent = opp.context.get('is_agent', False)
|
||||
probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel
|
||||
p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase']
|
||||
|
||||
if not is_agent:
|
||||
idx = int(opp.instrument_id)
|
||||
price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS)
|
||||
p *= np.exp(-0.5 * (price_ratio - 1.0))
|
||||
return float(np.clip(p, 0, 1))
|
||||
|
||||
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
|
||||
h = self.cfg.human_funnel
|
||||
exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase']
|
||||
return fills / (exp_conv + EPS)
|
||||
102
lab/case/thesis/metrics.py
Normal file
102
lab/case/thesis/metrics.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Thesis metrics for COI and behavioral analysis using ground truth labels."""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet
|
||||
from ...outlet.math_util import safe_log, EPS
|
||||
|
||||
|
||||
@dataclass
|
||||
class COIMetrics:
|
||||
coi_level: float = 0.0
|
||||
coi_leakage: float = 0.0
|
||||
realized_premium: float = 0.0
|
||||
theoretical_max: float = 0.0
|
||||
erosion_rate: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict[str, float]:
|
||||
return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']}
|
||||
|
||||
|
||||
def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics:
|
||||
prices, costs, refs = quote.prices, instruments.costs, instruments.refs
|
||||
margins = prices - costs
|
||||
coi_level = float(np.mean(margins))
|
||||
theoretical_max = float(np.mean(costs))
|
||||
realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0
|
||||
price_var = float(np.var(prices / refs))
|
||||
coi_leakage = contamination * (coi_level + price_var)
|
||||
erosion_rate = contamination * coi_level / (theoretical_max + EPS)
|
||||
return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium,
|
||||
theoretical_max=theoretical_max, erosion_rate=erosion_rate)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SeparabilityMetrics:
|
||||
classification_accuracy: float = 0.0
|
||||
estimated_alpha: float = 0.0
|
||||
n_human_sessions: int = 0
|
||||
n_agent_sessions: int = 0
|
||||
|
||||
|
||||
def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics:
|
||||
"""Compute separability using ground truth labels only."""
|
||||
if logs.events is None or len(logs.events) == 0:
|
||||
return SeparabilityMetrics(estimated_alpha=true_alpha)
|
||||
|
||||
sessions: Dict[str, bool] = {}
|
||||
for evt in logs.events:
|
||||
sid = evt.metadata.get('session_id', evt.opportunity_id)
|
||||
if sid not in sessions:
|
||||
sessions[sid] = evt.metadata.get('is_agent', False)
|
||||
|
||||
n_agent = sum(1 for is_agent in sessions.values() if is_agent)
|
||||
n_human = len(sessions) - n_agent
|
||||
est_alpha = n_agent / len(sessions) if sessions else 0.0
|
||||
|
||||
return SeparabilityMetrics(
|
||||
classification_accuracy=1.0, # ground truth is always correct
|
||||
estimated_alpha=est_alpha,
|
||||
n_human_sessions=n_human,
|
||||
n_agent_sessions=n_agent)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RevenueAttribution:
|
||||
total_revenue: float = 0.0
|
||||
human_revenue: float = 0.0
|
||||
agent_revenue: float = 0.0
|
||||
human_conversion: float = 0.0
|
||||
agent_conversion: float = 0.0
|
||||
|
||||
|
||||
def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution:
|
||||
if logs.executions is None:
|
||||
return RevenueAttribution(total_revenue=metrics.revenue)
|
||||
|
||||
human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0
|
||||
for exe in logs.executions:
|
||||
if exe.propensity < 0.05:
|
||||
agent_rev += exe.price * exe.size_filled
|
||||
agent_cnt += 1
|
||||
else:
|
||||
human_rev += exe.price * exe.size_filled
|
||||
human_cnt += 1
|
||||
|
||||
total_exp = logs.aggregates.get('n_arrivals', 1)
|
||||
return RevenueAttribution(
|
||||
total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev,
|
||||
human_conversion=human_cnt / (total_exp * 0.8 + EPS),
|
||||
agent_conversion=agent_cnt / (total_exp * 0.2 + EPS))
|
||||
|
||||
|
||||
def order_statistic_erosion(n_agents: int, price_variance: float) -> float:
|
||||
"""COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min."""
|
||||
if n_agents <= 1:
|
||||
return 0.0
|
||||
sigma, log_n = np.sqrt(price_variance), safe_log(n_agents)
|
||||
if log_n < 1:
|
||||
return 0.0
|
||||
shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
|
||||
return float(min(shift / (sigma * 2 + EPS), 1.0))
|
||||
228
lab/case/thesis/objectives.py
Normal file
228
lab/case/thesis/objectives.py
Normal file
@@ -0,0 +1,228 @@
|
||||
"""
|
||||
Thesis-specific objectives implementing robust pricing under contamination.
|
||||
|
||||
Implements the Maximin objective from Eq 23:
|
||||
π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
|
||||
|
||||
Key components:
|
||||
- COIObjective: Cost of Information penalty (Definition 1)
|
||||
- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
|
||||
- UXPenalty: User experience degradation from volatility
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
from ...outlet.objectives.base import BaseObjective, CompositeObjective
|
||||
from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
|
||||
from ...outlet.math_util import safe_log, EPS
|
||||
|
||||
class COIObjective(BaseObjective):
|
||||
"""Cost of Information penalty from Definition 1.
|
||||
|
||||
COI(π) = E[P] - p_min
|
||||
|
||||
The expected price premium over marginal cost represents the platform's
|
||||
pricing power. Agent reconnaissance erodes this by revealing price
|
||||
distribution to buyers.
|
||||
|
||||
We implement COI_leakage = f(τ') · InfoValue(p, τ')
|
||||
where f(τ') is the estimated agent probability.
|
||||
"""
|
||||
|
||||
def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
|
||||
"""
|
||||
Args:
|
||||
lambda_coi: Weight on COI penalty
|
||||
use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
|
||||
"""
|
||||
self.lambda_coi = lambda_coi
|
||||
self.use_revelation = use_revelation
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
# COI_leakage = α · InfoValue
|
||||
alpha = hidden.contamination
|
||||
|
||||
if self.use_revelation:
|
||||
# revelation surrogate: rare prices reveal more about policy
|
||||
# InfoValue = -log(π(p|τ')) ≈ surprise of the price
|
||||
price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
|
||||
info_value = price_surprise
|
||||
else:
|
||||
# query-tax surrogate: each agent query incurs constant leakage
|
||||
info_value = 1.0
|
||||
|
||||
leakage = alpha * info_value
|
||||
return -self.lambda_coi * leakage
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
alpha = hidden.contamination
|
||||
margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
|
||||
return {
|
||||
'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
|
||||
'contamination': alpha,
|
||||
'avg_margin': float(np.mean(margins)),
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class RobustObjectiveConfig:
|
||||
"""Configuration for robust Stackelberg objective.
|
||||
|
||||
Attributes:
|
||||
lambda_coi: Weight on COI penalty (λ in Eq 23)
|
||||
lambda_ux: Weight on UX penalty
|
||||
lambda_volatility: Weight on price volatility penalty
|
||||
gamma_inventory: Inventory risk aversion
|
||||
wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
|
||||
"""
|
||||
lambda_coi: float = 0.5
|
||||
lambda_ux: float = 0.1
|
||||
lambda_volatility: float = 0.2
|
||||
gamma_inventory: float = 0.1
|
||||
wasserstein_epsilon: float = 0.1
|
||||
|
||||
class RobustStackelbergObjective(BaseObjective):
|
||||
"""Implements the Maximin Objective from thesis Eq 23.
|
||||
|
||||
π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
|
||||
|
||||
The objective balances:
|
||||
1. Revenue R(p,d) from human purchases
|
||||
2. COI penalty for information leakage to agents
|
||||
3. UX penalty for price volatility
|
||||
4. Inventory/holding costs
|
||||
|
||||
The min over ambiguity set U_ε is approximated by penalizing
|
||||
high contamination scenarios more heavily.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: RobustObjectiveConfig | None = None):
|
||||
self.cfg = cfg or RobustObjectiveConfig()
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
cfg = self.cfg
|
||||
|
||||
# 1. base revenue (R(p,d))
|
||||
revenue = metrics.revenue
|
||||
cost = metrics.cost
|
||||
profit = revenue - cost
|
||||
|
||||
# 2. COI penalty: scales with contamination and margin extraction
|
||||
# high margins + high contamination = high leakage
|
||||
alpha = hidden.contamination
|
||||
margins = quote.prices - instruments.costs
|
||||
avg_margin = float(np.mean(margins))
|
||||
coi_penalty = cfg.lambda_coi * avg_margin * alpha
|
||||
|
||||
# 3. UX penalty: price volatility harms legitimate users
|
||||
volatility_penalty = cfg.lambda_volatility * metrics.volatility
|
||||
|
||||
# 4. inventory/position cost
|
||||
position_penalty = cfg.gamma_inventory * metrics.position_cost
|
||||
|
||||
# 5. lost opportunity cost (stockouts)
|
||||
lost_penalty = 0.1 * metrics.lost_opportunity
|
||||
|
||||
# robust adjustment: under adversarial distribution Q,
|
||||
# expect lower revenue and higher costs
|
||||
# approximate via worst-case contamination within ε-ball
|
||||
worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
|
||||
robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
|
||||
|
||||
total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
|
||||
|
||||
return total
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
cfg = self.cfg
|
||||
alpha = hidden.contamination
|
||||
margins = quote.prices - instruments.costs
|
||||
avg_margin = float(np.mean(margins))
|
||||
|
||||
return {
|
||||
'revenue': metrics.revenue,
|
||||
'cost': metrics.cost,
|
||||
'profit': metrics.revenue - metrics.cost,
|
||||
'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
|
||||
'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
|
||||
'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
|
||||
'lost_penalty': -0.1 * metrics.lost_opportunity,
|
||||
'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
|
||||
'contamination': alpha,
|
||||
'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
|
||||
}
|
||||
|
||||
class UXPenalty(BaseObjective):
|
||||
"""User experience penalty from price volatility.
|
||||
|
||||
High price volatility degrades UX for legitimate human users.
|
||||
This term ensures the defense doesn't harm real customers while
|
||||
protecting against agent reconnaissance.
|
||||
"""
|
||||
|
||||
def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
|
||||
self.scale = scale
|
||||
self.max_vol = max_acceptable_volatility
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
# penalty increases quadratically beyond threshold
|
||||
excess_vol = max(0, metrics.volatility - self.max_vol)
|
||||
return -self.scale * (excess_vol ** 2)
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {
|
||||
'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
|
||||
'volatility': metrics.volatility,
|
||||
}
|
||||
|
||||
class AdaptiveObjective(BaseObjective):
|
||||
"""Objective that adapts weights based on estimated contamination.
|
||||
|
||||
When contamination is low, focus on revenue maximization.
|
||||
When contamination is high, increase COI defense weight.
|
||||
"""
|
||||
|
||||
def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
|
||||
adaptation_rate: float = 2.0):
|
||||
self.base_lambda = base_lambda_coi
|
||||
self.max_lambda = max_lambda_coi
|
||||
self.rate = adaptation_rate
|
||||
|
||||
def _adaptive_lambda(self, alpha: float) -> float:
|
||||
# sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
|
||||
from ...outlet.math_util import sigmoid
|
||||
scale = sigmoid(self.rate * (alpha - 0.3))
|
||||
return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
alpha = hidden.contamination
|
||||
lambda_coi = self._adaptive_lambda(alpha)
|
||||
|
||||
profit = metrics.revenue - metrics.cost
|
||||
margins = quote.prices - instruments.costs
|
||||
coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
|
||||
|
||||
return profit - coi_penalty
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
alpha = hidden.contamination
|
||||
return {
|
||||
'profit': metrics.revenue - metrics.cost,
|
||||
'adaptive_lambda': self._adaptive_lambda(alpha),
|
||||
'contamination': alpha,
|
||||
}
|
||||
|
||||
def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
|
||||
lambda_vol: float = 0.2) -> CompositeObjective:
|
||||
"""Create the standard thesis objective composition."""
|
||||
return CompositeObjective([
|
||||
(RobustStackelbergObjective(RobustObjectiveConfig(
|
||||
lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
|
||||
])
|
||||
176
lab/case/thesis/platform.py
Normal file
176
lab/case/thesis/platform.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Thesis platform with real MDP behavioral models and separability scoring."""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
|
||||
PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
|
||||
from ...outlet.mechanisms.posted_price import PostedPriceConfig
|
||||
from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
|
||||
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
|
||||
from .execution import HybridExecutionModel, HybridExecutionConfig
|
||||
from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThesisConfig:
|
||||
# instruments
|
||||
n_instruments: int = 10
|
||||
cost_range: tuple[float, float] = (5.0, 50.0)
|
||||
margin_range: tuple[float, float] = (0.2, 0.5)
|
||||
|
||||
# contamination (Section 3.1)
|
||||
alpha_contamination: float = 0.2
|
||||
alpha_drift: float = 0.0
|
||||
alpha_bounds: tuple[float, float] = (0.0, 0.5)
|
||||
|
||||
# objectives (Eq 23)
|
||||
lambda_coi: float = 0.5
|
||||
lambda_ux: float = 0.1
|
||||
lambda_volatility: float = 0.2
|
||||
wasserstein_epsilon: float = 0.1
|
||||
|
||||
# arrivals
|
||||
sessions_per_step: int = 30
|
||||
human_views_range: tuple[int, int] = (1, 4)
|
||||
agent_views_range: tuple[int, int] = (3, 10)
|
||||
|
||||
# inventory
|
||||
initial_inventory: float = 100.0
|
||||
holding_cost_rate: float = 0.002
|
||||
|
||||
# real behavioral models (from sim.rl)
|
||||
use_real_behavior: bool = True
|
||||
use_separability: bool = False # disabled until classifier trained
|
||||
human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
|
||||
agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
|
||||
|
||||
# simulation
|
||||
max_steps: int = 500
|
||||
seed: int | None = 24
|
||||
log_level: LogLevel = LogLevel.AGG_ONLY
|
||||
|
||||
|
||||
def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
|
||||
"""Resolve data directories for behavioral models."""
|
||||
base = Path(__file__).parent.parent.parent.parent / "experiments"
|
||||
human = cfg.human_data_dir or str(base / "collected_data")
|
||||
agent = cfg.agent_data_dir or str(base / "agents/collected_data")
|
||||
return human, agent
|
||||
|
||||
|
||||
def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
|
||||
"""Create platform with real MDP behavioral models.
|
||||
|
||||
Implements:
|
||||
- Contaminated arrivals using learned MDP kernels from behavior_loader
|
||||
- Hybrid execution with real separability scoring from lib.separability
|
||||
- Robust Stackelberg objective (Eq 23)
|
||||
"""
|
||||
cfg = cfg or ThesisConfig()
|
||||
rng = np.random.default_rng(cfg.seed)
|
||||
human_dir, agent_dir = _resolve_data_dirs(cfg)
|
||||
|
||||
instruments = make_instruments(
|
||||
n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
|
||||
inst_type=InstrumentType.SKU, rng=rng)
|
||||
instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
|
||||
|
||||
arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
|
||||
base_rate=cfg.sessions_per_step,
|
||||
alpha_contamination=cfg.alpha_contamination,
|
||||
alpha_drift=cfg.alpha_drift,
|
||||
alpha_bounds=cfg.alpha_bounds,
|
||||
human_views_range=cfg.human_views_range,
|
||||
agent_views_range=cfg.agent_views_range,
|
||||
use_real_behavior=cfg.use_real_behavior,
|
||||
human_data_dir=human_dir,
|
||||
agent_data_dir=agent_dir,
|
||||
))
|
||||
|
||||
execution = HybridExecutionModel(HybridExecutionConfig(
|
||||
use_separability=cfg.use_separability,
|
||||
))
|
||||
|
||||
mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
|
||||
position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
|
||||
|
||||
market = None
|
||||
objective = RobustStackelbergObjective(RobustObjectiveConfig(
|
||||
lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
|
||||
lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
|
||||
|
||||
obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
|
||||
platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
|
||||
seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
|
||||
|
||||
return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
|
||||
position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AblationConfig(ThesisConfig):
|
||||
disable_coi_penalty: bool = False
|
||||
disable_ux_penalty: bool = False
|
||||
disable_contamination: bool = False
|
||||
disable_real_behavior: bool = False
|
||||
|
||||
|
||||
def make_ablation_platform(cfg: AblationConfig) -> Platform:
|
||||
if cfg.disable_coi_penalty:
|
||||
cfg.lambda_coi = 0.0
|
||||
if cfg.disable_ux_penalty:
|
||||
cfg.lambda_ux = 0.0
|
||||
if cfg.disable_contamination:
|
||||
cfg.alpha_contamination = 0.0
|
||||
if cfg.disable_real_behavior:
|
||||
cfg.use_real_behavior = False
|
||||
cfg.use_separability = False
|
||||
return make_thesis_platform(cfg)
|
||||
|
||||
|
||||
def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
|
||||
n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
|
||||
"""Test performance across contamination levels (Theorem 1 validation)."""
|
||||
from ...experiments.eval import rollout, fixed_price_policy
|
||||
|
||||
results = {}
|
||||
base_cfg = base_cfg or ThesisConfig()
|
||||
|
||||
for alpha in alpha_values:
|
||||
cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
|
||||
alpha_contamination=alpha)
|
||||
platform = make_thesis_platform(cfg)
|
||||
policy = fixed_price_policy(platform.instruments.refs)
|
||||
result = rollout(platform, policy, n_steps, seed=seed)
|
||||
results[alpha] = {
|
||||
'total_reward': result.total_reward,
|
||||
'total_pnl': result.total_pnl,
|
||||
'avg_conversion': result.avg_conversion,
|
||||
'final_contamination': platform._hidden.contamination,
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
|
||||
"""Compare real vs synthetic behavioral models."""
|
||||
from ...experiments.eval import rollout, fixed_price_policy
|
||||
|
||||
base_cfg = base_cfg or ThesisConfig()
|
||||
modes = {
|
||||
'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
|
||||
'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
|
||||
'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
|
||||
}
|
||||
|
||||
results = {}
|
||||
for name, cfg in modes.items():
|
||||
platform = make_thesis_platform(cfg)
|
||||
policy = fixed_price_policy(platform.instruments.refs)
|
||||
result = rollout(platform, policy, n_steps, seed=seed)
|
||||
results[name] = {
|
||||
'total_reward': result.total_reward,
|
||||
'total_pnl': result.total_pnl,
|
||||
'avg_conversion': result.avg_conversion,
|
||||
}
|
||||
return results
|
||||
136
lab/case/thesis/run_experiment.py
Normal file
136
lab/case/thesis/run_experiment.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
"""Thesis simulation experiments with real MDP behavioral models."""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
||||
|
||||
from lab.case.thesis.platform import make_thesis_platform, ThesisConfig
|
||||
from lab.case.thesis.metrics import compute_coi, compute_separability
|
||||
from lab.experiments.eval import compare_policies
|
||||
import numpy as np
|
||||
|
||||
|
||||
def demo_basic_simulation():
|
||||
print("=" * 70)
|
||||
print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)")
|
||||
print("=" * 70)
|
||||
|
||||
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5,
|
||||
max_steps=100, seed=42, use_real_behavior=True)
|
||||
platform = make_thesis_platform(cfg)
|
||||
|
||||
print(f"\nInstruments: {platform.instruments.n}")
|
||||
print(f"Reference prices: {platform.instruments.refs.round(2)}")
|
||||
print(f"Costs: {platform.instruments.costs.round(2)}")
|
||||
print(f"Initial contamination alpha={cfg.alpha_contamination}")
|
||||
print(f"Using real behavior: {cfg.use_real_behavior}")
|
||||
|
||||
result = platform.reset(seed=42)
|
||||
total_reward, coi_history = 0, []
|
||||
|
||||
print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}")
|
||||
print("-" * 55)
|
||||
|
||||
for t in range(cfg.max_steps):
|
||||
action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n)
|
||||
result = platform.step(action)
|
||||
total_reward += result.reward
|
||||
coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination)
|
||||
coi_history.append(coi.coi_level)
|
||||
|
||||
if t % 20 == 0:
|
||||
print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} "
|
||||
f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}")
|
||||
|
||||
print("-" * 55)
|
||||
print(f"Total Reward: {total_reward:.2f}")
|
||||
print(f"Average COI: {np.mean(coi_history):.2f}")
|
||||
print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}")
|
||||
|
||||
|
||||
def demo_contamination_sweep():
|
||||
print("\n" + "=" * 70)
|
||||
print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)")
|
||||
print("=" * 70)
|
||||
|
||||
from lab.case.thesis.platform import sweep_contamination
|
||||
trials = 20
|
||||
alpha_values = [i/trials for i in range(trials)]
|
||||
results = sweep_contamination(alpha_values, n_steps=100, seed=42)
|
||||
|
||||
print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}")
|
||||
print("-" * 45)
|
||||
for alpha, m in sorted(results.items()):
|
||||
print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}")
|
||||
|
||||
rewards = [results[a]['total_reward'] for a in sorted(results.keys())]
|
||||
dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)])
|
||||
trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1]
|
||||
print(f"Trend (alpha~reward correlation): {trend:.3f}")
|
||||
|
||||
|
||||
def demo_policy_comparison():
|
||||
print("\n" + "=" * 70)
|
||||
print("EXPERIMENT: Policy Comparison under Contamination")
|
||||
print("=" * 70)
|
||||
|
||||
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42)
|
||||
platform = make_thesis_platform(cfg)
|
||||
|
||||
def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0
|
||||
def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0
|
||||
def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0
|
||||
def adaptive_policy(obs, t):
|
||||
fills = obs[platform.instruments.n:2*platform.instruments.n]
|
||||
exp = obs[2*platform.instruments.n:3*platform.instruments.n]
|
||||
conv = np.sum(fills) / (np.sum(exp) + 1e-8)
|
||||
return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0
|
||||
|
||||
policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy,
|
||||
'conservative': conservative_policy, 'adaptive': adaptive_policy}
|
||||
results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42)
|
||||
|
||||
print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}")
|
||||
print("-" * 65)
|
||||
for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']):
|
||||
print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} "
|
||||
f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}")
|
||||
|
||||
|
||||
def demo_session_analysis():
|
||||
"""Analyze session-level behavior from MDP trajectories."""
|
||||
print("\n" + "=" * 70)
|
||||
print("EXPERIMENT: Session Analysis (Ground Truth)")
|
||||
print("=" * 70)
|
||||
|
||||
from lab.outlet.constants import LogLevel
|
||||
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50,
|
||||
log_level=LogLevel.FULL, seed=42, use_real_behavior=True)
|
||||
platform = make_thesis_platform(cfg)
|
||||
|
||||
result = platform.reset(seed=42)
|
||||
human_sessions, agent_sessions = 0, 0
|
||||
|
||||
for t in range(cfg.max_steps):
|
||||
action = platform.instruments.refs * 1.1
|
||||
result = platform.step(action)
|
||||
sep = compute_separability(result.logs, result.hidden.contamination)
|
||||
human_sessions += sep.n_human_sessions
|
||||
agent_sessions += sep.n_agent_sessions
|
||||
|
||||
total = human_sessions + agent_sessions
|
||||
print(f"\nTotal sessions: {total}")
|
||||
print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)")
|
||||
print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)")
|
||||
print(f"True contamination: {cfg.alpha_contamination:.1%}")
|
||||
print(f"Observed contamination: {agent_sessions/total:.1%}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo_basic_simulation()
|
||||
demo_contamination_sweep()
|
||||
# demo_policy_comparison()
|
||||
# demo_session_analysis()
|
||||
Reference in New Issue
Block a user