shock: defining new lab environment and formulation

This commit is contained in:
2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions

1
lab/README.md Normal file
View File

@@ -0,0 +1 @@
# MOS (Money Operating System)

27
lab/__init__.py Normal file
View File

@@ -0,0 +1,27 @@
"""
Quote-Control Simulator: Research-grade platform for dynamic pricing and market making
The platform abstracts pricing as: Quote -> Arrival -> Execution -> Position
Supports multiple mechanisms:
- PostedPrice: retail dynamic pricing
- TwoSided: market making with bid-ask spreads
- Auction: reserve/shading for auction settings
Example usage:
from lab.config import make_retail_platform
from lab.experiments import rollout, fixed_price_policy
platform = make_retail_platform()
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps=100)
print(f"Total PnL: {result.total_pnl:.2f}")
"""
from .config import make_retail_platform, make_market_making_platform, RetailConfig, MarketMakingConfig
from .outlet import Platform, PlatformConfig, Quote, Observation, StepResult
__all__ = [
'make_retail_platform', 'make_market_making_platform',
'RetailConfig', 'MarketMakingConfig',
'Platform', 'PlatformConfig', 'Quote', 'Observation', 'StepResult',
]

6
lab/case/__init__.py Normal file
View File

@@ -0,0 +1,6 @@
"""
Case studies implementing specific research scenarios.
Available cases:
- thesis: PHANTOM thesis implementation with contaminated demand and DR-RL
"""

View File

@@ -0,0 +1,25 @@
"""
Thesis-specific implementation of the PHANTOM pricing defense framework.
This module implements the mathematical models from the thesis:
- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3)
- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1)
- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23)
- COIMetrics: Cost of Information tracking (Definition 1)
The platform configuration creates a research environment that directly
maps to the thesis mathematical framework for DR-RL experiments.
"""
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
from .execution import HybridExecutionModel, HybridExecutionConfig
from .objectives import RobustStackelbergObjective, COIObjective
from .platform import make_thesis_platform, ThesisConfig
from .metrics import COIMetrics, compute_coi, compute_separability
__all__ = [
'ContaminatedArrivalModel', 'ContaminatedArrivalConfig',
'HybridExecutionModel', 'HybridExecutionConfig',
'RobustStackelbergObjective', 'COIObjective',
'make_thesis_platform', 'ThesisConfig',
'COIMetrics', 'compute_coi', 'compute_separability',
]

327
lab/case/thesis/arrivals.py Normal file
View File

@@ -0,0 +1,327 @@
"""Contaminated arrivals using learned MDP kernels from behavior_loader.
Implements thesis demand model (Section 3.1):
- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3)
- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] (Eq 2)
- Per-session separability via KL divergence Δ_H, Δ_A (Eq 20-21)
The arrival model samples sessions from a mixture of human/agent behavioral profiles,
each session produces a trajectory τ_s and associated demand computation q(τ').
"""
from __future__ import annotations
from dataclasses import dataclass, field
from types import SimpleNamespace
from typing import Dict, List, Tuple, Optional
import numpy as np
from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
from ...outlet.constants import Side, OpportunityType
from ...outlet.math_util import poisson_arrivals
try:
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from sim.rl.behavior_loader.models import (
BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence
)
REAL_MDP = True
except ImportError:
REAL_MDP = False
kl_divergence = None
EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details",
"add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"}
EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page",
"view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item",
"add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"}
# action space partition A = A_nav A_cart A_filter A_dwell with signal weights ω (Table 1)
ACTION_WEIGHTS: Dict[str, float] = {
"add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0, # A_cart
"hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25, # A_dwell
"page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav
"search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0, # A_filter
}
@dataclass
class SessionDemand:
"""Per-session demand computation per thesis formulation (Section 3.1).
Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses
divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ').
"""
session_id: str
q: Dict[int, float] # q̂_i demand proxy per product (Eq 2)
trajectory: List[Dict] # τ_s = (e_{s,1}, ..., e_{s,L_s})
delta_h: float = 0.0 # D_KL(T̂' || T̄_H) (Eq 20)
delta_a: float = 0.0 # D_KL(T̂' || T̄_A) (Eq 21)
alpha_hat: float = 0.0 # per-session contamination estimate
actor_class: str = "H" # ground truth Y_s ∈ {H, A}
theta: Dict[str, float] = field(default_factory=dict)
def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]:
"""Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2."""
q = {i: 0.0 for i in range(n_products)}
for e in events:
action, pidx = e.get("eventName", ""), e.get("product_idx")
if pidx is not None and 0 <= pidx < n_products:
q[pidx] += ACTION_WEIGHTS.get(action, 0.1)
return q
def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]:
"""Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21)."""
if not events or kl_divergence is None:
return 0.0, 0.0
# build empirical transition kernel from trajectory
trans: Dict[str, Dict[str, int]] = {}
prev = "session_start"
for e in events:
curr = e.get("eventName", "session_end")
trans.setdefault(prev, {})
trans[prev][curr] = trans[prev].get(curr, 0) + 1
prev = curr
# normalize to probabilities
kernel = {}
for s, dests in trans.items():
total = sum(dests.values())
kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {}
# aggregate to event-level and compute KL divergence against reference kernels
delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1)
delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1)
return delta_h, delta_a
def _canonicalize(raw: Dict) -> Dict:
out = {}
for src, dsts in raw.items():
sc = EVENT_CANON.get(src, src)
out.setdefault(sc, {})
for dst, p in dsts.items():
dc = EVENT_CANON.get(dst, dst)
out[sc][dc] = out[sc].get(dc, 0.0) + p
return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0}
class BehavioralProfile:
"""Markov profile from learned MDP kernels (Section 3.5.2).
Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19)
"""
STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
# fallback kernels T̄_H, T̄_A when real data unavailable
FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15},
"view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
"learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
"add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
"purchase_complete": {"session_end": 1.0}}
FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05},
"view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05},
"learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05},
"add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4},
"purchase_complete": {"session_end": 1.0}}
def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""):
self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95)
self.trans = self._load(data_dir) # T̂_Y transition kernel
self._ensure_terminal()
self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES}
def _load(self, data_dir: str) -> Dict:
if not REAL_MDP or not data_dir:
print("using fallback")
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
try:
mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP()
raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {}
return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
except Exception:
print("using fallback")
return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H)
def _ensure_terminal(self):
self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0)
self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1})
def _tprobs(self, state: str, pidx: int) -> Dict[str, float]:
probs = dict(self.trans.get(state, {"session_end": 1.0}))
if state == "add_item_to_cart":
base = probs.get("purchase_complete", 0.0)
df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0)
adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95)
rem = max(1e-6, 1.0 - adj)
other = sum(v for k, v in probs.items() if k != "purchase_complete")
probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()}
total = sum(probs.values())
return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0}
def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]:
events, fevts = [], []
state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices)))
cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05)
while state != "session_end" and len(events) < 40:
if state != "session_start":
row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human",
"eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}",
"price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"),
"ts": t, "unit_cost": cost, "base_price": float(prices[pidx])}
if state == "purchase_complete":
row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost)
events.append(row)
fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t))
probs = self._tprobs(state, pidx)
state = rng.choice(list(probs.keys()), p=list(probs.values()))
sh, sc = self.dwell.get(state, (2.0, 1.0))
t += max(0.3, rng.gamma(shape=sh, scale=sc))
return events, fevts
@dataclass
class ContaminatedArrivalConfig:
base_rate: float = 20.0
alpha_contamination: float = 0.2
alpha_drift: float = 0.0
alpha_bounds: tuple[float, float] = (0.0, 0.5)
human_views_range: tuple[int, int] = (1, 4)
agent_views_range: tuple[int, int] = (3, 10)
agent_systematic: bool = True
use_real_behavior: bool = True
human_data_dir: str = ""
agent_data_dir: str = ""
class ContaminatedArrivalModel:
"""Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3).
Samples sessions from human/agent behavioral profiles, computes per-session
demand proxy q̂ and divergence signals Δ_H, Δ_A for separability.
"""
def __init__(self, cfg: ContaminatedArrivalConfig | None = None):
self.cfg = cfg or ContaminatedArrivalConfig()
self._alpha = self.cfg.alpha_contamination
self._scount = 0
self._profiles: Dict[str, BehavioralProfile] = {}
self._ref_kernels: Dict[str, Dict] = {} # T̄_H, T̄_A reference kernels
self._session_demands: List[SessionDemand] = [] # collected session demands
@property
def alpha(self) -> float:
return self._alpha
def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile:
key = actor
if key not in self._profiles:
ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir
if not ddir and self.cfg.use_real_behavior:
base = Path(__file__).parent.parent.parent.parent / "experiments"
ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data"))
profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "")
self._profiles[key] = profile
self._ref_kernels[key] = profile.trans # cache T̄_Y for divergence
return self._profiles[key]
def get_ref_kernels(self) -> Tuple[Dict, Dict]:
"""Return reference transition kernels T̄_H, T̄_A for divergence computation."""
return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H),
self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A))
def get_session_demands(self) -> List[SessionDemand]:
"""Return collected session demands for downstream analysis."""
return self._session_demands
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
"""Sample arrivals as per Eq 3: mixture of human/agent demand distributions.
For each session s, computes:
- Trajectory τ_s from behavioral profile sampling
- Demand proxy q̂ via weighted action aggregation (Eq 2)
- Divergence signals Δ_H, Δ_A for separability (Eq 20-21)
- Per-session contamination estimate α̂(τ')
"""
cfg = self.cfg
if cfg.alpha_drift != 0:
self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds)
hidden.contamination = self._alpha
n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng)
prices, costs = instruments.refs, instruments.costs
margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0)
hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin)
ref_h, ref_a = self.get_ref_kernels()
opps = []
for _ in range(n_sess):
self._scount += 1
sid = f"s{self._scount:06d}"
is_agent = rng.random() < self._alpha
actor, probs = ("agents", aprob) if is_agent else ("humans", hprob)
profile = self._profile(actor, probs)
events, fevts = profile.sample(rng, sid, prices, costs)
# compute demand proxy q̂ per Eq 2
q = compute_demand_proxy(events, instruments.n)
# compute divergence signals Δ_H, Δ_A per Eq 20-21
delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a)
# per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A))
alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5
theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent
else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0})
# store session demand for downstream analysis
self._session_demands.append(SessionDemand(
session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a,
alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta))
viewed = list({e["product_idx"] for e in events if "product_idx" in e})
if not viewed:
vr = cfg.agent_views_range if is_agent else cfg.human_views_range
viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False))
for vi, iid in enumerate(viewed):
opps.append(Opportunity(
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt),
context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent,
'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed),
'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events,
'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a}))
return opps
@dataclass
class AdversarialArrivalConfig:
base_rate: float = 5.0
n_parallel_agents: int = 3
query_all_products: bool = True
class AdversarialArrivalModel:
"""Adversarial coordination (Theorem 1): as N->inf, COI->0."""
def __init__(self, cfg: AdversarialArrivalConfig | None = None):
self.cfg = cfg or AdversarialArrivalConfig()
self._qcount = 0
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]:
cfg, opps = self.cfg, []
for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)):
self._qcount += 1
for ai in range(cfg.n_parallel_agents):
sid = f"adv{self._qcount:06d}-{ai}"
prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1)
for iid in prods:
opps.append(Opportunity(
id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY,
instrument_id=int(iid), size=1.0, t=t,
context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True,
'agent_index': ai, 'query_group': self._qcount,
'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}}))
return opps

View File

@@ -0,0 +1,91 @@
"""Execution models with divergent H/A behavior using ground truth labels."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict
import numpy as np
from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState
from ...outlet.math_util import sigmoid, safe_log, EPS
@dataclass
class HybridExecutionConfig:
human_base_prob: float = 0.3
human_elasticity: float = 2.5
agent_conversion: float = 0.01
cross_elasticity: float = 0.4
quality_weight: float = 0.2
use_separability: bool = False
class HybridExecutionModel:
"""Execution with divergent H/A behavior using ground truth labels."""
def __init__(self, cfg: HybridExecutionConfig | None = None):
self.cfg = cfg or HybridExecutionConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
cfg, idx = self.cfg, int(opp.instrument_id)
price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx])
ctx = opp.context
theta = ctx.get('theta', {})
is_agent = ctx.get('is_agent', False)
if is_agent:
return cfg.agent_conversion * theta.get('base_conversion', 1.0)
# human logit discrete choice
sens = theta.get('price_sensitivity', cfg.human_elasticity)
base = theta.get('base_conversion', cfg.human_base_prob)
u_price = -sens * safe_log(price / (ref + EPS))
quality = instruments.instruments[idx].attrs.get('quality', 0.5)
u_quality = cfg.quality_weight * quality
u_comp = 0.0
if market and market.competitor_quotes is not None:
cp = market.competitor_quotes[idx]
if cp < price:
u_comp = -cfg.cross_elasticity * (price - cp) / ref
utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp
return float(sigmoid(utility))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
if context is None:
return fills / (self.cfg.human_base_prob + EPS)
agent_frac = context.get('contamination', 0.0)
return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS)
@dataclass
class SeparableExecutionConfig:
human_funnel: Dict[str, float] = None
agent_funnel: Dict[str, float] = None
def __post_init__(self):
self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6}
self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1}
class SeparableExecutionModel:
"""Execution with Markov funnel kernels using ground truth labels."""
def __init__(self, cfg: SeparableExecutionConfig | None = None):
self.cfg = cfg or SeparableExecutionConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
is_agent = opp.context.get('is_agent', False)
probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel
p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase']
if not is_agent:
idx = int(opp.instrument_id)
price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS)
p *= np.exp(-0.5 * (price_ratio - 1.0))
return float(np.clip(p, 0, 1))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray:
h = self.cfg.human_funnel
exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase']
return fills / (exp_conv + EPS)

102
lab/case/thesis/metrics.py Normal file
View File

@@ -0,0 +1,102 @@
"""Thesis metrics for COI and behavioral analysis using ground truth labels."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict
import numpy as np
from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet
from ...outlet.math_util import safe_log, EPS
@dataclass
class COIMetrics:
coi_level: float = 0.0
coi_leakage: float = 0.0
realized_premium: float = 0.0
theoretical_max: float = 0.0
erosion_rate: float = 0.0
def to_dict(self) -> dict[str, float]:
return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']}
def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics:
prices, costs, refs = quote.prices, instruments.costs, instruments.refs
margins = prices - costs
coi_level = float(np.mean(margins))
theoretical_max = float(np.mean(costs))
realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0
price_var = float(np.var(prices / refs))
coi_leakage = contamination * (coi_level + price_var)
erosion_rate = contamination * coi_level / (theoretical_max + EPS)
return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium,
theoretical_max=theoretical_max, erosion_rate=erosion_rate)
@dataclass
class SeparabilityMetrics:
classification_accuracy: float = 0.0
estimated_alpha: float = 0.0
n_human_sessions: int = 0
n_agent_sessions: int = 0
def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics:
"""Compute separability using ground truth labels only."""
if logs.events is None or len(logs.events) == 0:
return SeparabilityMetrics(estimated_alpha=true_alpha)
sessions: Dict[str, bool] = {}
for evt in logs.events:
sid = evt.metadata.get('session_id', evt.opportunity_id)
if sid not in sessions:
sessions[sid] = evt.metadata.get('is_agent', False)
n_agent = sum(1 for is_agent in sessions.values() if is_agent)
n_human = len(sessions) - n_agent
est_alpha = n_agent / len(sessions) if sessions else 0.0
return SeparabilityMetrics(
classification_accuracy=1.0, # ground truth is always correct
estimated_alpha=est_alpha,
n_human_sessions=n_human,
n_agent_sessions=n_agent)
@dataclass
class RevenueAttribution:
total_revenue: float = 0.0
human_revenue: float = 0.0
agent_revenue: float = 0.0
human_conversion: float = 0.0
agent_conversion: float = 0.0
def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution:
if logs.executions is None:
return RevenueAttribution(total_revenue=metrics.revenue)
human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0
for exe in logs.executions:
if exe.propensity < 0.05:
agent_rev += exe.price * exe.size_filled
agent_cnt += 1
else:
human_rev += exe.price * exe.size_filled
human_cnt += 1
total_exp = logs.aggregates.get('n_arrivals', 1)
return RevenueAttribution(
total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev,
human_conversion=human_cnt / (total_exp * 0.8 + EPS),
agent_conversion=agent_cnt / (total_exp * 0.2 + EPS))
def order_statistic_erosion(n_agents: int, price_variance: float) -> float:
"""COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min."""
if n_agents <= 1:
return 0.0
sigma, log_n = np.sqrt(price_variance), safe_log(n_agents)
if log_n < 1:
return 0.0
shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS))
return float(min(shift / (sigma * 2 + EPS), 1.0))

View File

@@ -0,0 +1,228 @@
"""
Thesis-specific objectives implementing robust pricing under contamination.
Implements the Maximin objective from Eq 23:
π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
Key components:
- COIObjective: Cost of Information penalty (Definition 1)
- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
- UXPenalty: User experience degradation from volatility
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ...outlet.objectives.base import BaseObjective, CompositeObjective
from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
from ...outlet.math_util import safe_log, EPS
class COIObjective(BaseObjective):
"""Cost of Information penalty from Definition 1.
COI(π) = E[P] - p_min
The expected price premium over marginal cost represents the platform's
pricing power. Agent reconnaissance erodes this by revealing price
distribution to buyers.
We implement COI_leakage = f(τ') · InfoValue(p, τ')
where f(τ') is the estimated agent probability.
"""
def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
"""
Args:
lambda_coi: Weight on COI penalty
use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
"""
self.lambda_coi = lambda_coi
self.use_revelation = use_revelation
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# COI_leakage = α · InfoValue
alpha = hidden.contamination
if self.use_revelation:
# revelation surrogate: rare prices reveal more about policy
# InfoValue = -log(π(p|τ')) ≈ surprise of the price
price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
info_value = price_surprise
else:
# query-tax surrogate: each agent query incurs constant leakage
info_value = 1.0
leakage = alpha * info_value
return -self.lambda_coi * leakage
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
return {
'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'contamination': alpha,
'avg_margin': float(np.mean(margins)),
}
@dataclass
class RobustObjectiveConfig:
"""Configuration for robust Stackelberg objective.
Attributes:
lambda_coi: Weight on COI penalty (λ in Eq 23)
lambda_ux: Weight on UX penalty
lambda_volatility: Weight on price volatility penalty
gamma_inventory: Inventory risk aversion
wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
"""
lambda_coi: float = 0.5
lambda_ux: float = 0.1
lambda_volatility: float = 0.2
gamma_inventory: float = 0.1
wasserstein_epsilon: float = 0.1
class RobustStackelbergObjective(BaseObjective):
"""Implements the Maximin Objective from thesis Eq 23.
π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
The objective balances:
1. Revenue R(p,d) from human purchases
2. COI penalty for information leakage to agents
3. UX penalty for price volatility
4. Inventory/holding costs
The min over ambiguity set U_ε is approximated by penalizing
high contamination scenarios more heavily.
"""
def __init__(self, cfg: RobustObjectiveConfig | None = None):
self.cfg = cfg or RobustObjectiveConfig()
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
cfg = self.cfg
# 1. base revenue (R(p,d))
revenue = metrics.revenue
cost = metrics.cost
profit = revenue - cost
# 2. COI penalty: scales with contamination and margin extraction
# high margins + high contamination = high leakage
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
coi_penalty = cfg.lambda_coi * avg_margin * alpha
# 3. UX penalty: price volatility harms legitimate users
volatility_penalty = cfg.lambda_volatility * metrics.volatility
# 4. inventory/position cost
position_penalty = cfg.gamma_inventory * metrics.position_cost
# 5. lost opportunity cost (stockouts)
lost_penalty = 0.1 * metrics.lost_opportunity
# robust adjustment: under adversarial distribution Q,
# expect lower revenue and higher costs
# approximate via worst-case contamination within ε-ball
worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
return total
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
cfg = self.cfg
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
return {
'revenue': metrics.revenue,
'cost': metrics.cost,
'profit': metrics.revenue - metrics.cost,
'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
'lost_penalty': -0.1 * metrics.lost_opportunity,
'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
'contamination': alpha,
'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
}
class UXPenalty(BaseObjective):
"""User experience penalty from price volatility.
High price volatility degrades UX for legitimate human users.
This term ensures the defense doesn't harm real customers while
protecting against agent reconnaissance.
"""
def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
self.scale = scale
self.max_vol = max_acceptable_volatility
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# penalty increases quadratically beyond threshold
excess_vol = max(0, metrics.volatility - self.max_vol)
return -self.scale * (excess_vol ** 2)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {
'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'volatility': metrics.volatility,
}
class AdaptiveObjective(BaseObjective):
"""Objective that adapts weights based on estimated contamination.
When contamination is low, focus on revenue maximization.
When contamination is high, increase COI defense weight.
"""
def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
adaptation_rate: float = 2.0):
self.base_lambda = base_lambda_coi
self.max_lambda = max_lambda_coi
self.rate = adaptation_rate
def _adaptive_lambda(self, alpha: float) -> float:
# sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
from ...outlet.math_util import sigmoid
scale = sigmoid(self.rate * (alpha - 0.3))
return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
alpha = hidden.contamination
lambda_coi = self._adaptive_lambda(alpha)
profit = metrics.revenue - metrics.cost
margins = quote.prices - instruments.costs
coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
return profit - coi_penalty
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
return {
'profit': metrics.revenue - metrics.cost,
'adaptive_lambda': self._adaptive_lambda(alpha),
'contamination': alpha,
}
def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
lambda_vol: float = 0.2) -> CompositeObjective:
"""Create the standard thesis objective composition."""
return CompositeObjective([
(RobustStackelbergObjective(RobustObjectiveConfig(
lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
])

176
lab/case/thesis/platform.py Normal file
View File

@@ -0,0 +1,176 @@
"""Thesis platform with real MDP behavioral models and separability scoring."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
from ...outlet.mechanisms.posted_price import PostedPriceConfig
from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
from .execution import HybridExecutionModel, HybridExecutionConfig
from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
@dataclass
class ThesisConfig:
# instruments
n_instruments: int = 10
cost_range: tuple[float, float] = (5.0, 50.0)
margin_range: tuple[float, float] = (0.2, 0.5)
# contamination (Section 3.1)
alpha_contamination: float = 0.2
alpha_drift: float = 0.0
alpha_bounds: tuple[float, float] = (0.0, 0.5)
# objectives (Eq 23)
lambda_coi: float = 0.5
lambda_ux: float = 0.1
lambda_volatility: float = 0.2
wasserstein_epsilon: float = 0.1
# arrivals
sessions_per_step: int = 30
human_views_range: tuple[int, int] = (1, 4)
agent_views_range: tuple[int, int] = (3, 10)
# inventory
initial_inventory: float = 100.0
holding_cost_rate: float = 0.002
# real behavioral models (from sim.rl)
use_real_behavior: bool = True
use_separability: bool = False # disabled until classifier trained
human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
# simulation
max_steps: int = 500
seed: int | None = 24
log_level: LogLevel = LogLevel.AGG_ONLY
def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
"""Resolve data directories for behavioral models."""
base = Path(__file__).parent.parent.parent.parent / "experiments"
human = cfg.human_data_dir or str(base / "collected_data")
agent = cfg.agent_data_dir or str(base / "agents/collected_data")
return human, agent
def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
"""Create platform with real MDP behavioral models.
Implements:
- Contaminated arrivals using learned MDP kernels from behavior_loader
- Hybrid execution with real separability scoring from lib.separability
- Robust Stackelberg objective (Eq 23)
"""
cfg = cfg or ThesisConfig()
rng = np.random.default_rng(cfg.seed)
human_dir, agent_dir = _resolve_data_dirs(cfg)
instruments = make_instruments(
n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
inst_type=InstrumentType.SKU, rng=rng)
instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
base_rate=cfg.sessions_per_step,
alpha_contamination=cfg.alpha_contamination,
alpha_drift=cfg.alpha_drift,
alpha_bounds=cfg.alpha_bounds,
human_views_range=cfg.human_views_range,
agent_views_range=cfg.agent_views_range,
use_real_behavior=cfg.use_real_behavior,
human_data_dir=human_dir,
agent_data_dir=agent_dir,
))
execution = HybridExecutionModel(HybridExecutionConfig(
use_separability=cfg.use_separability,
))
mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
market = None
objective = RobustStackelbergObjective(RobustObjectiveConfig(
lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
@dataclass
class AblationConfig(ThesisConfig):
disable_coi_penalty: bool = False
disable_ux_penalty: bool = False
disable_contamination: bool = False
disable_real_behavior: bool = False
def make_ablation_platform(cfg: AblationConfig) -> Platform:
if cfg.disable_coi_penalty:
cfg.lambda_coi = 0.0
if cfg.disable_ux_penalty:
cfg.lambda_ux = 0.0
if cfg.disable_contamination:
cfg.alpha_contamination = 0.0
if cfg.disable_real_behavior:
cfg.use_real_behavior = False
cfg.use_separability = False
return make_thesis_platform(cfg)
def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
"""Test performance across contamination levels (Theorem 1 validation)."""
from ...experiments.eval import rollout, fixed_price_policy
results = {}
base_cfg = base_cfg or ThesisConfig()
for alpha in alpha_values:
cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
alpha_contamination=alpha)
platform = make_thesis_platform(cfg)
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps, seed=seed)
results[alpha] = {
'total_reward': result.total_reward,
'total_pnl': result.total_pnl,
'avg_conversion': result.avg_conversion,
'final_contamination': platform._hidden.contamination,
}
return results
def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
"""Compare real vs synthetic behavioral models."""
from ...experiments.eval import rollout, fixed_price_policy
base_cfg = base_cfg or ThesisConfig()
modes = {
'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
}
results = {}
for name, cfg in modes.items():
platform = make_thesis_platform(cfg)
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps, seed=seed)
results[name] = {
'total_reward': result.total_reward,
'total_pnl': result.total_pnl,
'avg_conversion': result.avg_conversion,
}
return results

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python
"""Thesis simulation experiments with real MDP behavioral models."""
from __future__ import annotations
import sys
from pathlib import Path
if __name__ == '__main__':
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from lab.case.thesis.platform import make_thesis_platform, ThesisConfig
from lab.case.thesis.metrics import compute_coi, compute_separability
from lab.experiments.eval import compare_policies
import numpy as np
def demo_basic_simulation():
print("=" * 70)
print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)")
print("=" * 70)
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5,
max_steps=100, seed=42, use_real_behavior=True)
platform = make_thesis_platform(cfg)
print(f"\nInstruments: {platform.instruments.n}")
print(f"Reference prices: {platform.instruments.refs.round(2)}")
print(f"Costs: {platform.instruments.costs.round(2)}")
print(f"Initial contamination alpha={cfg.alpha_contamination}")
print(f"Using real behavior: {cfg.use_real_behavior}")
result = platform.reset(seed=42)
total_reward, coi_history = 0, []
print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}")
print("-" * 55)
for t in range(cfg.max_steps):
action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n)
result = platform.step(action)
total_reward += result.reward
coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination)
coi_history.append(coi.coi_level)
if t % 20 == 0:
print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} "
f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}")
print("-" * 55)
print(f"Total Reward: {total_reward:.2f}")
print(f"Average COI: {np.mean(coi_history):.2f}")
print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}")
def demo_contamination_sweep():
print("\n" + "=" * 70)
print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)")
print("=" * 70)
from lab.case.thesis.platform import sweep_contamination
trials = 20
alpha_values = [i/trials for i in range(trials)]
results = sweep_contamination(alpha_values, n_steps=100, seed=42)
print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}")
print("-" * 45)
for alpha, m in sorted(results.items()):
print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}")
rewards = [results[a]['total_reward'] for a in sorted(results.keys())]
dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)])
trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1]
print(f"Trend (alpha~reward correlation): {trend:.3f}")
def demo_policy_comparison():
print("\n" + "=" * 70)
print("EXPERIMENT: Policy Comparison under Contamination")
print("=" * 70)
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42)
platform = make_thesis_platform(cfg)
def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0
def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0
def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0
def adaptive_policy(obs, t):
fills = obs[platform.instruments.n:2*platform.instruments.n]
exp = obs[2*platform.instruments.n:3*platform.instruments.n]
conv = np.sum(fills) / (np.sum(exp) + 1e-8)
return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0
policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy,
'conservative': conservative_policy, 'adaptive': adaptive_policy}
results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42)
print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}")
print("-" * 65)
for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']):
print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} "
f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}")
def demo_session_analysis():
"""Analyze session-level behavior from MDP trajectories."""
print("\n" + "=" * 70)
print("EXPERIMENT: Session Analysis (Ground Truth)")
print("=" * 70)
from lab.outlet.constants import LogLevel
cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50,
log_level=LogLevel.FULL, seed=42, use_real_behavior=True)
platform = make_thesis_platform(cfg)
result = platform.reset(seed=42)
human_sessions, agent_sessions = 0, 0
for t in range(cfg.max_steps):
action = platform.instruments.refs * 1.1
result = platform.step(action)
sep = compute_separability(result.logs, result.hidden.contamination)
human_sessions += sep.n_human_sessions
agent_sessions += sep.n_agent_sessions
total = human_sessions + agent_sessions
print(f"\nTotal sessions: {total}")
print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)")
print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)")
print(f"True contamination: {cfg.alpha_contamination:.1%}")
print(f"Observed contamination: {agent_sessions/total:.1%}")
if __name__ == '__main__':
demo_basic_simulation()
demo_contamination_sweep()
# demo_policy_comparison()
# demo_session_analysis()

156
lab/config.py Normal file
View File

@@ -0,0 +1,156 @@
"""
Configuration and factory functions for creating pre-configured platforms.
This module provides:
- RetailConfig, MarketMakingConfig: Configuration dataclasses
- make_retail_platform: Factory for retail dynamic pricing scenarios
- make_market_making_platform: Factory for market making scenarios
Example:
>>> from lab.config import make_retail_platform
>>> platform = make_retail_platform(RetailConfig(n_instruments=5))
>>> result = platform.reset(seed=42)
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from .outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
PostedPriceMechanism, TwoSidedMechanism, make_instruments,
InstrumentType, LogLevel)
from .outlet.mechanisms.posted_price import PostedPriceConfig
from .outlet.mechanisms.two_sided import TwoSidedConfig
from .population import (SessionArrivalModel, PoissonArrivalModel, HawkesArrivalModel,
ElasticityExecutionModel, IntensityExecutionModel,
ReactiveCompetitorModel, GBMMarketModel)
from .population.arrivals import SessionArrivalConfig, PoissonArrivalConfig, HawkesArrivalConfig
from .population.execution import ElasticityConfig, IntensityConfig
from .population.competitors import ReactiveCompetitorConfig, GBMMarketConfig
from .outlet.objectives.factory import retail_objective, market_making_objective
@dataclass
class RetailConfig:
"""Configuration for retail dynamic pricing scenario.
Attributes:
n_instruments: Number of products to price
cost_range: (min, max) for random product costs
margin_range: (min, max) for random initial margins
initial_inventory: Starting inventory per product
holding_cost_rate: Cost per unit per step for holding
sessions_per_step: Number of browsing sessions per step
contamination: Fraction of sessions that are scrapers
max_steps: Maximum episode length
seed: Random seed for reproducibility
"""
n_instruments: int = 10
cost_range: tuple[float, float] = (5.0, 50.0)
margin_range: tuple[float, float] = (0.2, 0.5)
initial_inventory: float = 100.0
holding_cost_rate: float = 0.002
sessions_per_step: int = 30
contamination: float = 0.1
max_steps: int = 500
seed: int | None = None
def make_retail_platform(cfg: RetailConfig | None = None) -> Platform:
"""Create a pre-configured retail dynamic pricing platform.
Components:
- Mechanism: PostedPriceMechanism (single price per product)
- Arrivals: SessionArrivalModel (browsing sessions with views)
- Execution: ElasticityExecutionModel (price sensitivity)
- Market: ReactiveCompetitorModel (can trigger price wars)
- Objective: PnL - holding_cost - volatility - lost_opportunity
Args:
cfg: Configuration (uses defaults if None)
Returns:
Configured Platform instance
"""
cfg = cfg or RetailConfig()
rng = np.random.default_rng(cfg.seed)
instruments = make_instruments(cfg.n_instruments, cfg.cost_range, cfg.margin_range,
InstrumentType.SKU, rng)
instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
mechanism = PostedPriceMechanism(PostedPriceConfig())
arrival = SessionArrivalModel(SessionArrivalConfig(
sessions_per_step=cfg.sessions_per_step, contamination=cfg.contamination))
execution = ElasticityExecutionModel(ElasticityConfig())
position = PositionModel(PositionConfig(
initial_position=cfg.initial_inventory,
holding_cost_rate=cfg.holding_cost_rate))
market = ReactiveCompetitorModel(ReactiveCompetitorConfig(), refs=instruments.refs)
objective = retail_objective()
return Platform(
instruments=instruments, mechanism=mechanism, arrival=arrival,
execution=execution, position=position, market=market, objective=objective,
cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
seed=cfg.seed, log_level=LogLevel.AGG_ONLY)
)
@dataclass
class MarketMakingConfig:
"""Configuration for market making scenario.
Attributes:
n_instruments: Number of assets to quote
initial_mid: Initial mid-price for assets
mu: Price drift (expected return)
sigma: Price volatility
gamma: Inventory risk aversion parameter
base_arrival_rate: Order arrival rate (Hawkes baseline)
max_steps: Maximum episode length
seed: Random seed for reproducibility
"""
n_instruments: int = 5
initial_mid: float = 100.0
mu: float = 0.0
sigma: float = 0.02
gamma: float = 0.1
base_arrival_rate: float = 20.0
max_steps: int = 1000
seed: int | None = None
def make_market_making_platform(cfg: MarketMakingConfig | None = None) -> Platform:
"""Create a pre-configured market making platform.
Components:
- Mechanism: TwoSidedMechanism (bid-ask spread quoting)
- Arrivals: HawkesArrivalModel (clustered order flow)
- Execution: IntensityExecutionModel (distance-based fills)
- Market: GBMMarketModel (geometric Brownian motion mid-prices)
- Objective: PnL + spread_capture - inventory_risk
Args:
cfg: Configuration (uses defaults if None)
Returns:
Configured Platform instance
"""
cfg = cfg or MarketMakingConfig()
rng = np.random.default_rng(cfg.seed)
instruments = make_instruments(cfg.n_instruments, (cfg.initial_mid*0.9, cfg.initial_mid*1.1),
(0.0, 0.0), InstrumentType.ASSET, rng)
instruments.position = np.zeros(cfg.n_instruments)
mechanism = TwoSidedMechanism(TwoSidedConfig())
arrival = HawkesArrivalModel(HawkesArrivalConfig(base_rate=cfg.base_arrival_rate))
execution = IntensityExecutionModel(IntensityConfig())
position = PositionModel(PositionConfig(
initial_position=0.0, min_position=-500, max_position=500,
holding_cost_rate=0.0)) # use inventory risk penalty instead
market = GBMMarketModel(GBMMarketConfig(mu=cfg.mu, sigma=cfg.sigma),
initial=instruments.refs)
objective = market_making_objective(gamma=cfg.gamma, sigma=cfg.sigma)
return Platform(
instruments=instruments, mechanism=mechanism, arrival=arrival,
execution=execution, position=position, market=market, objective=objective,
cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
seed=cfg.seed, log_level=LogLevel.AGG_ONLY)
)

12
lab/docs/Makefile Normal file
View File

@@ -0,0 +1,12 @@
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

39
lab/docs/conf.py Normal file
View File

@@ -0,0 +1,39 @@
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))
project = 'Quote-Control Simulator'
copyright = '2025, PHANTOM Research'
author = 'PHANTOM Research'
release = '0.1.0'
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'sphinx.ext.intersphinx',
'sphinx.ext.autosummary',
]
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
html_theme = 'alabaster'
html_static_path = ['_static']
autodoc_default_options = {
'members': True,
'undoc-members': True,
'show-inheritance': True,
}
napoleon_google_docstring = True
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = True
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None),
'numpy': ('https://numpy.org/doc/stable/', None),
}
autosummary_generate = True

39
lab/docs/index.rst Normal file
View File

@@ -0,0 +1,39 @@
Quote-Control Simulator
=======================
Research-grade platform for dynamic pricing and market making experiments.
The platform abstracts pricing as: **Quote → Arrival → Execution → Position**
Supports multiple mechanisms:
* **PostedPrice**: retail dynamic pricing
* **TwoSided**: market making with bid-ask spreads
* **Auction**: reserve/shading for auction settings
Quick Start
-----------
.. code-block:: python
from lab.config import make_retail_platform
from lab.experiments import rollout, fixed_price_policy
platform = make_retail_platform()
policy = fixed_price_policy(platform.instruments.refs)
result = rollout(platform, policy, n_steps=100)
print(f"Total PnL: {result.total_pnl:.2f}")
.. toctree::
:maxdepth: 2
:caption: Contents:
modules/outlet
modules/population
modules/experiments
Indices
-------
* :ref:`genindex`
* :ref:`modindex`

View File

@@ -0,0 +1,14 @@
Experiments
===========
Evaluation & OPE
----------------
.. automodule:: lab.experiments.eval
:members:
Configuration
-------------
.. automodule:: lab.config
:members:

View File

@@ -0,0 +1,77 @@
Outlet (Core Simulator)
=======================
Types
-----
.. automodule:: lab.outlet.types
:members:
Constants
---------
.. automodule:: lab.outlet.constants
:members:
Protocols
---------
.. automodule:: lab.outlet.protocols
:members:
Platform
--------
.. automodule:: lab.outlet.platform
:members:
Stock & Position
----------------
.. automodule:: lab.outlet.stock
:members:
Observation
-----------
.. automodule:: lab.outlet.observation
:members:
Mechanisms
----------
Posted Price
~~~~~~~~~~~~
.. automodule:: lab.outlet.mechanisms.posted_price
:members:
Two-Sided (Market Making)
~~~~~~~~~~~~~~~~~~~~~~~~~
.. automodule:: lab.outlet.mechanisms.two_sided
:members:
Auction
~~~~~~~
.. automodule:: lab.outlet.mechanisms.auction
:members:
Objectives
----------
.. automodule:: lab.outlet.objectives.base
:members:
.. automodule:: lab.outlet.objectives.penalties
:members:
.. automodule:: lab.outlet.objectives.factory
:members:
Math Utilities
--------------
.. automodule:: lab.outlet.math_util
:members:

View File

@@ -0,0 +1,20 @@
Population Models
=================
Arrival Models
--------------
.. automodule:: lab.population.arrivals
:members:
Execution Models
----------------
.. automodule:: lab.population.execution
:members:
Competitor / Market Models
--------------------------
.. automodule:: lab.population.competitors
:members:

View File

@@ -0,0 +1,7 @@
from .eval import (rollout, RolloutResult, compare_policies, compute_ips, OPEResult,
fixed_price_policy, cost_plus_margin_policy, random_walk_policy, epsilon_greedy_policy)
__all__ = [
'rollout', 'RolloutResult', 'compare_policies', 'compute_ips', 'OPEResult',
'fixed_price_policy', 'cost_plus_margin_policy', 'random_walk_policy', 'epsilon_greedy_policy',
]

213
lab/experiments/eval.py Normal file
View File

@@ -0,0 +1,213 @@
"""
Evaluation utilities for policy testing and off-policy evaluation.
This module provides:
- rollout: Run a policy on the platform for multiple steps
- compare_policies: Compare multiple policies with statistics
- Baseline policies: fixed_price, cost_plus_margin, random_walk, epsilon_greedy
- OPE estimators: IPS and SNIPS for off-policy evaluation
Example:
>>> from lab.config import make_retail_platform
>>> from lab.experiments.eval import rollout, fixed_price_policy
>>> platform = make_retail_platform()
>>> policy = fixed_price_policy(platform.instruments.refs)
>>> result = rollout(platform, policy, n_steps=100)
>>> print(f"Total PnL: {result.total_pnl:.2f}")
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable, Any
import numpy as np
from ..outlet.platform import Platform
from ..outlet.types import StepResult, StepLogs, Quote
# Policy signature: takes (observation_flat, timestep) -> (action_prices, propensity)
Policy = Callable[[np.ndarray, int], tuple[np.ndarray, float]]
@dataclass
class RolloutResult:
"""Results from a policy rollout.
Attributes:
rewards: Per-step rewards
metrics: Per-step StepMetrics objects
logs: Per-step StepLogs objects
total_reward: Sum of rewards
total_pnl: Sum of PnL from metrics
avg_conversion: Average conversion rate
"""
rewards: list[float]
metrics: list[Any]
logs: list[StepLogs]
total_reward: float
total_pnl: float
avg_conversion: float
def rollout(platform: Platform, policy: Policy, n_steps: int, seed: int | None = None) -> RolloutResult:
"""Execute a policy on the platform for n_steps.
Args:
platform: The simulation platform
policy: Function (obs, t) -> (action, propensity)
n_steps: Number of steps to run
seed: Random seed for reproducibility
Returns:
RolloutResult with rewards, metrics, and summary statistics
"""
result = platform.reset(seed)
rewards, metrics, logs = [], [], []
for t in range(n_steps):
obs_flat = result.obs.to_flat()
action, propensity = policy(obs_flat, t)
result = platform.step(action, propensity)
rewards.append(result.reward)
metrics.append(result.metrics)
logs.append(result.logs)
if result.terminated or result.truncated:
break
return RolloutResult(
rewards=rewards, metrics=metrics, logs=logs,
total_reward=sum(rewards),
total_pnl=sum(m.pnl for m in metrics),
avg_conversion=np.mean([m.conversion for m in metrics])
)
# Baseline policies for comparison
def fixed_price_policy(refs: np.ndarray) -> Policy:
"""Policy that always quotes at reference prices."""
def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
return refs.copy(), 1.0
return policy
def cost_plus_margin_policy(costs: np.ndarray, margin: float = 0.3) -> Policy:
"""Policy that quotes at cost * (1 + margin)."""
prices = costs * (1 + margin)
def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
return prices.copy(), 1.0
return policy
def random_walk_policy(refs: np.ndarray, volatility: float = 0.05,
rng: np.random.Generator | None = None) -> Policy:
"""Policy that performs a random walk around reference prices."""
rng = rng or np.random.default_rng()
prices = refs.copy()
def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
nonlocal prices
delta = rng.normal(0, volatility, len(prices))
prices = prices * (1 + delta)
prices = np.clip(prices, refs * 0.5, refs * 2.0)
return prices.copy(), 1.0
return policy
def epsilon_greedy_policy(base_policy: Policy, refs: np.ndarray,
epsilon: float = 0.1, rng: np.random.Generator | None = None) -> Policy:
"""Wrap a policy with epsilon-greedy exploration."""
rng = rng or np.random.default_rng()
def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]:
if rng.random() < epsilon:
action = refs * rng.uniform(0.8, 1.2, len(refs))
return action, epsilon / len(refs)
else:
action, _ = base_policy(obs, t)
return action, 1 - epsilon
return policy
# Off-Policy Evaluation (OPE)
@dataclass
class OPEResult:
"""Results from off-policy evaluation.
Attributes:
ips_estimate: Inverse Propensity Scoring estimate
snips_estimate: Self-normalized IPS estimate (more stable)
n_samples: Number of samples used
effective_samples: Effective sample size (accounts for variance)
"""
ips_estimate: float
snips_estimate: float
n_samples: int
effective_samples: float
def compute_ips(logs: list[StepLogs], rewards: list[float],
target_policy: Policy, behavior_propensities: list[float] | None = None) -> OPEResult:
"""Compute IPS and SNIPS estimators for off-policy evaluation.
Uses logged propensities to estimate expected reward under a target
policy from data collected under a behavior policy.
Args:
logs: Step logs containing propensities
rewards: Observed rewards from behavior policy
target_policy: Policy to evaluate (not currently used, assumes deterministic)
behavior_propensities: Override propensities if not in logs
Returns:
OPEResult with IPS, SNIPS estimates and sample statistics
"""
if behavior_propensities is None:
# extract from logs
behavior_propensities = []
for log in logs:
if log.executions:
avg_prop = np.mean([e.propensity for e in log.executions])
else:
avg_prop = 1.0
behavior_propensities.append(avg_prop)
# compute importance weights
weights = []
for i, (log, bp) in enumerate(zip(logs, behavior_propensities)):
# target propensity would need obs reconstruction - simplified here
tp = 1.0 # assume deterministic target
w = tp / (bp + 1e-8)
weights.append(w)
weights = np.array(weights)
rewards = np.array(rewards)
# IPS estimate
ips = np.sum(weights * rewards) / len(rewards)
# SNIPS (self-normalized)
snips = np.sum(weights * rewards) / (np.sum(weights) + 1e-8)
# effective sample size
ess = (np.sum(weights) ** 2) / (np.sum(weights ** 2) + 1e-8)
return OPEResult(ips_estimate=ips, snips_estimate=snips,
n_samples=len(rewards), effective_samples=ess)
def compare_policies(platform: Platform, policies: dict[str, Policy],
n_steps: int = 100, n_runs: int = 5, seed: int = 42) -> dict[str, dict]:
"""Compare multiple policies with statistical summary.
Args:
platform: Simulation platform
policies: Dict mapping policy names to policy functions
n_steps: Steps per rollout
n_runs: Number of rollouts per policy (different seeds)
seed: Base random seed
Returns:
Dict mapping policy names to result dicts with mean/std statistics
"""
results = {}
for name, policy in policies.items():
run_results = []
for i in range(n_runs):
r = rollout(platform, policy, n_steps, seed=seed + i)
run_results.append(r)
results[name] = {
'mean_reward': np.mean([r.total_reward for r in run_results]),
'std_reward': np.std([r.total_reward for r in run_results]),
'mean_pnl': np.mean([r.total_pnl for r in run_results]),
'mean_conversion': np.mean([r.avg_conversion for r in run_results]),
}
return results

17
lab/outlet/__init__.py Normal file
View File

@@ -0,0 +1,17 @@
from .constants import Side, MechanismType, InstrumentType, OpportunityType, EventType, LogLevel
from .types import (Instrument, InstrumentSet, Quote, Opportunity, Execution,
StepEvent, StepLogs, StepMetrics, MarketState, HiddenState, Observation, StepResult)
from .stock import PositionModel, PositionConfig, make_instruments
from .platform import Platform, PlatformConfig
from .observation import DefaultObservationBuilder, ObservationConfig
from .mechanisms import PostedPriceMechanism, TwoSidedMechanism, AuctionMechanism
__all__ = [
'Side', 'MechanismType', 'InstrumentType', 'OpportunityType', 'EventType', 'LogLevel',
'Instrument', 'InstrumentSet', 'Quote', 'Opportunity', 'Execution',
'StepEvent', 'StepLogs', 'StepMetrics', 'MarketState', 'HiddenState', 'Observation', 'StepResult',
'PositionModel', 'PositionConfig', 'make_instruments',
'Platform', 'PlatformConfig',
'DefaultObservationBuilder', 'ObservationConfig',
'PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism',
]

83
lab/outlet/constants.py Normal file
View File

@@ -0,0 +1,83 @@
"""
Constants and enumerations for the Quote-Control simulator.
This module defines the core enums used throughout the platform to ensure
type safety and consistent semantics across different pricing mechanisms.
"""
from enum import Enum, auto
class Side(Enum):
"""Transaction side indicator.
Attributes:
BUY: Buyer-initiated transaction (customer purchases, market buy order)
SELL: Seller-initiated transaction (market sell order, short sale)
"""
BUY = auto()
SELL = auto()
class MechanismType(Enum):
"""Pricing mechanism type defining how quotes translate to executions.
Attributes:
POSTED_PRICE: Single posted price per instrument (retail dynamic pricing)
TWO_SIDED_QUOTE: Bid-ask spread quoting (market making, liquidity provision)
AUCTION: Reserve price or bid shading (ad auctions, marketplaces)
"""
POSTED_PRICE = auto()
TWO_SIDED_QUOTE = auto()
AUCTION = auto()
class InstrumentType(Enum):
"""Type of instrument being priced.
Attributes:
SKU: Retail product with inventory constraints
ASSET: Financial instrument with position limits
LOAN: Credit product with interest rate pricing
SUBSCRIPTION: Recurring service with periodic fees
"""
SKU = auto()
ASSET = auto()
LOAN = auto()
SUBSCRIPTION = auto()
class OpportunityType(Enum):
"""Type of arrival opportunity.
Attributes:
SESSION: Retail browsing session with potential purchase intent
MARKET_ORDER: Financial market order arrival (buy or sell)
REQUEST: Service or credit request requiring quote response
"""
SESSION = auto()
MARKET_ORDER = auto()
REQUEST = auto()
class EventType(Enum):
"""Type of logged event during simulation.
Attributes:
ARRIVAL: New opportunity arrived in the system
EXPOSURE: Quote was shown to an arrival
EXECUTION: Transaction was executed
ABANDON: Opportunity abandoned without execution
CANCEL: Pending order was cancelled
"""
ARRIVAL = auto()
EXPOSURE = auto()
EXECUTION = auto()
ABANDON = auto()
CANCEL = auto()
class LogLevel(Enum):
"""Verbosity level for step logging.
Attributes:
NONE: No logging, fastest execution
AGG_ONLY: Only aggregate statistics per step
FULL: Full event-level logging with propensities for OPE
"""
NONE = auto()
AGG_ONLY = auto()
FULL = auto()

86
lab/outlet/gym_wrapper.py Normal file
View File

@@ -0,0 +1,86 @@
"""
Gymnasium-compatible wrapper for the Quote-Control platform.
Provides a standard Gym interface for RL training:
- observation_space: Box space with flattened observation
- action_space: Box space with price multipliers [0.5, 2.0]
- reset(), step(), render(), close() methods
Example:
>>> from lab.config import make_retail_platform
>>> from lab.outlet.gym_wrapper import QuoteGymEnv
>>> env = QuoteGymEnv(make_retail_platform())
>>> obs, info = env.reset()
>>> obs, reward, done, truncated, info = env.step(env.action_space.sample())
"""
from __future__ import annotations
from typing import Any
import numpy as np
try:
import gymnasium as gym
from gymnasium import spaces
HAS_GYM = True
except ImportError:
HAS_GYM = False
from .platform import Platform, PlatformConfig
from .types import Quote, InstrumentSet, StepResult
class QuoteGymEnv:
"""Gymnasium-compatible environment wrapper.
Wraps a Platform instance with standard Gym interface.
Actions are price multipliers in [0.5, 2.0] applied to reference prices.
Observations are flattened numpy arrays containing quotes, fills, exposures.
"""
def __init__(self, platform: Platform):
if not HAS_GYM:
raise ImportError("gymnasium required for QuoteGymEnv")
self.platform = platform
self.n = platform.instruments.n
self._last_result: StepResult | None = None
# action space: price adjustments as multipliers [0.5, 2.0]
self.action_space = spaces.Box(low=0.5, high=2.0, shape=(self.n,), dtype=np.float32)
# observation space
obs_dim = self.n * 4 # quotes + fills + exposures + position
if platform.market:
obs_dim += self.n # competitor quotes
self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
shape=(obs_dim,), dtype=np.float32)
def reset(self, seed: int | None = None, options: dict | None = None) -> tuple[np.ndarray, dict]:
result = self.platform.reset(seed)
self._last_result = result
return result.obs.to_flat().astype(np.float32), result.info
def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
# convert action (multipliers) to absolute prices
refs = self.platform.instruments.refs
prices = refs * action
result = self.platform.step(prices)
self._last_result = result
return (result.obs.to_flat().astype(np.float32), result.reward,
result.terminated, result.truncated, result.info)
def render(self) -> None:
if self._last_result:
m = self._last_result.metrics
print(f"t={self.platform._t} pnl={m.pnl:.2f} units={m.units_traded:.0f} "
f"conv={m.conversion:.3f} vol={m.volatility:.3f}")
def close(self) -> None:
pass
def make_env(platform: Platform) -> QuoteGymEnv:
return QuoteGymEnv(platform)
if HAS_GYM:
# register if gymnasium available
try:
gym.register(id='QuoteControl-v0', entry_point='outlet.gym_wrapper:QuoteGymEnv')
except:
pass # already registered or other issue

57
lab/outlet/math_util.py Normal file
View File

@@ -0,0 +1,57 @@
"""
Numerical utilities for stable computation.
This module provides numerically stable implementations of common operations:
- safe_exp, safe_log: Avoid overflow/underflow
- softmax: Numerically stable softmax
- sigmoid, clamp: Standard transformations
- intensity_decay: Avellaneda-Stoikov fill intensity
- inventory_penalty: Quadratic inventory risk
- poisson_arrivals, hawkes_intensity: Arrival process helpers
All functions accept both scalars and numpy arrays.
"""
import numpy as np
EPS = 1e-8 # small constant to avoid division by zero
MAX_EXP = 700.0 # maximum safe exponent to avoid overflow
def safe_exp(x: np.ndarray | float) -> np.ndarray | float:
return np.exp(np.clip(x, -MAX_EXP, MAX_EXP))
def safe_log(x: np.ndarray | float) -> np.ndarray | float:
return np.log(np.maximum(x, EPS))
def clamp(x: np.ndarray | float, lo: float, hi: float) -> np.ndarray | float:
return np.clip(x, lo, hi)
def sigmoid(x: np.ndarray | float) -> np.ndarray | float:
return 1.0 / (1.0 + safe_exp(-x))
def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
x_max = np.max(x, axis=axis, keepdims=True)
exp_x = safe_exp(x - x_max)
return exp_x / (np.sum(exp_x, axis=axis, keepdims=True) + EPS)
def geometric_series(base: float, ratio: float, n: int) -> np.ndarray:
return base * (ratio ** np.arange(n))
def ema(old: float, new: float, alpha: float = 0.1) -> float:
return alpha * new + (1 - alpha) * old
def intensity_decay(distance: float, kappa: float = 1.0) -> float:
"""Avellaneda-Stoikov style fill intensity decay with quote distance"""
return safe_exp(-kappa * distance)
def inventory_penalty(q: float, gamma: float = 0.1, sigma: float = 1.0) -> float:
"""Quadratic inventory risk penalty"""
return gamma * sigma**2 * q**2 / 2
def poisson_arrivals(rate: float, dt: float, rng: np.random.Generator) -> int:
return rng.poisson(rate * dt)
def hawkes_intensity(base: float, history: np.ndarray, alpha: float, beta: float, t: float) -> float:
"""Self-exciting Hawkes process intensity"""
if len(history) == 0: return base
decays = safe_exp(-beta * (t - history[history < t]))
return base + alpha * np.sum(decays)

View File

@@ -0,0 +1,5 @@
from .posted_price import PostedPriceMechanism
from .two_sided import TwoSidedMechanism
from .auction import AuctionMechanism
__all__ = ['PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism']

View File

@@ -0,0 +1,73 @@
"""
Auction mechanism for reserve pricing and bid shading.
In this mechanism, the agent sets reserve prices that affect
win probability and clearing prices. Used for ad auctions,
marketplace auctions, and similar settings.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
from ..constants import Side
from ..math_util import clamp, sigmoid
@dataclass
class AuctionConfig:
"""Configuration for auction mechanism.
Attributes:
min_reserve: Minimum reserve price
max_reserve: Maximum reserve price
base_win_prob: Baseline win probability at reference reserve
sensitivity: How much higher reserves reduce win probability
"""
min_reserve: float = 0.0
max_reserve: float = 100.0
base_win_prob: float = 0.3
sensitivity: float = 2.0
class AuctionMechanism:
"""Auction mechanism for reserve pricing.
The agent sets reserve prices that affect:
- Win probability: higher reserves reduce chance of winning
- Clearing price: bounded between reserve and simulated max bid
Win probability: base_prob * sigmoid(-sensitivity * (reserve - ref) / ref)
Clearing price: max(reserve, min(max_bid, reserve + random_increment))
Only BUY-side opportunities are processed (auction wins).
"""
def __init__(self, cfg: AuctionConfig | None = None):
self.cfg = cfg or AuctionConfig()
def apply_quote(self, quote: Quote, instruments: InstrumentSet,
rng: np.random.Generator) -> Quote:
reserves = clamp(quote.prices, self.cfg.min_reserve, self.cfg.max_reserve)
return Quote(prices=reserves, propensity=quote.propensity, metadata=quote.metadata)
def process_opportunity(self, opp: Opportunity, quote: Quote,
instruments: InstrumentSet, market: MarketState | None,
rng: np.random.Generator) -> Execution | None:
if opp.side != Side.BUY: return None
idx = int(opp.instrument_id)
reserve = float(quote.prices[idx])
ref = instruments.refs[idx]
# win probability decreases with higher reserve
relative_reserve = (reserve - ref) / (ref + 1e-8)
win_prob = self.cfg.base_win_prob * sigmoid(-self.cfg.sensitivity * relative_reserve)
if rng.random() > win_prob: return None
# clearing price is between reserve and some max bid (simulated)
max_bid = ref * (1 + rng.exponential(0.2))
clearing = max(reserve, min(max_bid, reserve + rng.exponential(0.1) * ref))
return Execution(
opportunity_id=opp.id, instrument_id=opp.instrument_id,
side=opp.side, size_requested=opp.size, size_filled=opp.size,
price=clearing, propensity=quote.propensity * win_prob, t=opp.t
)

View File

@@ -0,0 +1,84 @@
"""
Posted price mechanism for retail dynamic pricing.
In this mechanism, the agent posts a single price per instrument.
Buyers decide whether to purchase based on the posted price.
This is the standard e-commerce dynamic pricing model.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
from ..constants import Side
from ..math_util import clamp
@dataclass
class PostedPriceConfig:
"""Configuration for posted price mechanism.
Attributes:
min_price: Absolute minimum price
max_price: Absolute maximum price
max_delta_pct: Maximum price change per step as fraction of previous
min_margin_pct: Minimum margin over cost basis
round_to: Price rounding granularity (None = no rounding)
"""
min_price: float = 0.01
max_price: float = 1000.0
max_delta_pct: float = 0.2
min_margin_pct: float = 0.05
round_to: float | None = 0.01
class PostedPriceMechanism:
"""Posted price mechanism for retail dynamic pricing.
The agent posts a single price per product. Constraints enforced:
- Prices within [min_price, max_price]
- Margin at least min_margin_pct above cost
- Price changes limited to max_delta_pct per step
- Prices rounded to round_to granularity
Only BUY-side opportunities are processed (customers purchasing).
"""
def __init__(self, cfg: PostedPriceConfig | None = None):
self.cfg = cfg or PostedPriceConfig()
def apply_quote(self, quote: Quote, instruments: InstrumentSet,
rng: np.random.Generator) -> Quote:
prices = quote.prices.copy()
costs = instruments.costs
refs = instruments.refs
c = self.cfg
# enforce min margin
min_prices = costs * (1 + c.min_margin_pct)
prices = np.maximum(prices, min_prices)
# enforce absolute bounds
prices = clamp(prices, c.min_price, c.max_price)
# enforce max delta if we have history
if 'prev_prices' in quote.metadata:
prev = quote.metadata['prev_prices']
max_change = prev * c.max_delta_pct
prices = clamp(prices, prev - max_change, prev + max_change)
# round prices
if c.round_to:
prices = np.round(prices / c.round_to) * c.round_to
return Quote(prices=prices, propensity=quote.propensity,
metadata={**quote.metadata, 'prev_prices': prices})
def process_opportunity(self, opp: Opportunity, quote: Quote,
instruments: InstrumentSet, market: MarketState | None,
rng: np.random.Generator) -> Execution | None:
if opp.side != Side.BUY: return None # posted price is buy-only
idx = int(opp.instrument_id)
price = float(quote.prices[idx])
return Execution(
opportunity_id=opp.id, instrument_id=opp.instrument_id,
side=opp.side, size_requested=opp.size, size_filled=opp.size,
price=price, propensity=quote.propensity, t=opp.t
)

View File

@@ -0,0 +1,89 @@
"""
Two-sided quoting mechanism for market making.
In this mechanism, the agent posts both bid and ask prices.
Execution depends on the distance from the market mid-price.
This models liquidity provision in financial markets.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState
from ..constants import Side
from ..math_util import clamp, intensity_decay
@dataclass
class TwoSidedConfig:
"""Configuration for two-sided quoting mechanism.
Attributes:
min_spread: Minimum bid-ask spread
max_spread: Maximum bid-ask spread
min_price: Absolute minimum price
max_price: Absolute maximum price
fill_kappa: Intensity decay parameter (higher = faster decay with distance)
"""
min_spread: float = 0.01
max_spread: float = 0.5
min_price: float = 0.01
max_price: float = 10000.0
fill_kappa: float = 1.5
class TwoSidedMechanism:
"""Two-sided quoting mechanism for market making.
The agent posts bid (buy) and ask (sell) prices around a mid-point.
Fill probability decays exponentially with distance from mid-price,
following the Avellaneda-Stoikov intensity model.
Both BUY and SELL opportunities are processed:
- BUY: customer buys at agent's ask price
- SELL: customer sells at agent's bid price
"""
def __init__(self, cfg: TwoSidedConfig | None = None):
self.cfg = cfg or TwoSidedConfig()
def apply_quote(self, quote: Quote, instruments: InstrumentSet,
rng: np.random.Generator) -> Quote:
prices = quote.prices.copy()
spreads = quote.spreads.copy() if quote.spreads is not None else np.full_like(prices, 0.02)
c = self.cfg
prices = clamp(prices, c.min_price, c.max_price)
spreads = clamp(spreads, c.min_spread, c.max_spread)
# ensure bids < asks
half_spread = spreads / 2
bids = prices - half_spread
asks = prices + half_spread
bids = np.maximum(bids, c.min_price)
asks = np.minimum(asks, c.max_price)
spreads = asks - bids
prices = (bids + asks) / 2
return Quote(prices=prices, spreads=spreads, propensity=quote.propensity,
metadata=quote.metadata)
def process_opportunity(self, opp: Opportunity, quote: Quote,
instruments: InstrumentSet, market: MarketState | None,
rng: np.random.Generator) -> Execution | None:
idx = int(opp.instrument_id)
mid = market.mid_prices[idx] if market and market.mid_prices is not None else quote.prices[idx]
if opp.side == Side.BUY:
price = float(quote.asks[idx]) if quote.asks is not None else float(quote.prices[idx])
distance = price - mid
else:
price = float(quote.bids[idx]) if quote.bids is not None else float(quote.prices[idx])
distance = mid - price
# probabilistic fill based on distance from mid
fill_prob = intensity_decay(abs(distance), self.cfg.fill_kappa)
if rng.random() > fill_prob: return None
return Execution(
opportunity_id=opp.id, instrument_id=opp.instrument_id,
side=opp.side, size_requested=opp.size, size_filled=opp.size,
price=price, propensity=quote.propensity * fill_prob, t=opp.t
)

View File

@@ -0,0 +1,11 @@
from .base import BaseObjective, CompositeObjective
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
from .factory import make_objective, make_composite, retail_objective, market_making_objective
__all__ = [
'BaseObjective', 'CompositeObjective',
'PnLObjective', 'VolatilityPenalty', 'HoldingCostPenalty',
'LostOpportunityCostPenalty', 'InventoryRiskPenalty', 'SpreadCaptureReward',
'make_objective', 'make_composite', 'retail_objective', 'market_making_objective',
]

View File

@@ -0,0 +1,48 @@
"""
Base classes for reward objectives.
Objectives compute scalar rewards from step metrics. The CompositeObjective
allows combining multiple objectives with weights for multi-objective optimization.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
class BaseObjective(ABC):
"""Abstract base class for reward objectives.
Subclasses must implement reward() and breakdown() methods.
"""
@abstractmethod
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ...
@abstractmethod
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ...
class CompositeObjective(BaseObjective):
"""Weighted sum of multiple objectives.
Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility).
Args:
objectives: List of (objective, weight) tuples
"""
def __init__(self, objectives: list[tuple[BaseObjective, float]]):
self.objectives = objectives
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return sum(w * obj.reward(quote, instruments, metrics, hidden, obs)
for obj, w in self.objectives)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
bd = {}
for obj, w in self.objectives:
for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items():
bd[k] = w * v
return bd

View File

@@ -0,0 +1,82 @@
"""
Factory functions for creating objectives.
Provides:
- make_objective: Create single objective by name
- make_composite: Create weighted combination of objectives
- retail_objective: Default objective for retail pricing
- market_making_objective: Default objective for market making
"""
from __future__ import annotations
from .base import BaseObjective, CompositeObjective
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
REGISTRY: dict[str, type[BaseObjective]] = {
'pnl': PnLObjective,
'volatility': VolatilityPenalty,
'holding_cost': HoldingCostPenalty,
'lost_opportunity': LostOpportunityCostPenalty,
'inventory_risk': InventoryRiskPenalty,
'spread_capture': SpreadCaptureReward,
}
def make_objective(name: str, **kwargs) -> BaseObjective:
"""Create an objective by name.
Args:
name: Objective name (pnl, volatility, holding_cost, lost_opportunity,
inventory_risk, spread_capture)
**kwargs: Passed to objective constructor
Returns:
Instantiated objective
"""
if name not in REGISTRY:
raise ValueError(f"Unknown objective: {name}. Available: {list(REGISTRY.keys())}")
return REGISTRY[name](**kwargs)
def make_composite(spec: list[tuple[str, float, dict]] | dict[str, float]) -> CompositeObjective:
"""Create composite objective from specification.
Args:
spec: Either:
- list of (name, weight, kwargs) tuples for full control
- dict of {name: weight} for simple cases
Returns:
CompositeObjective with specified components
"""
objectives = []
if isinstance(spec, dict):
for name, weight in spec.items():
objectives.append((make_objective(name), weight))
else:
for name, weight, kwargs in spec:
objectives.append((make_objective(name, **kwargs), weight))
return CompositeObjective(objectives)
def retail_objective(volatility_weight: float = 0.1, holding_weight: float = 0.5,
stockout_weight: float = 0.3) -> CompositeObjective:
"""Default objective for retail dynamic pricing.
Reward = PnL - volatility_weight*volatility - holding_weight*holding_cost
- stockout_weight*lost_opportunity
"""
return make_composite({
'pnl': 1.0,
'volatility': volatility_weight,
'holding_cost': holding_weight,
'lost_opportunity': stockout_weight,
})
def market_making_objective(gamma: float = 0.1, sigma: float = 1.0) -> CompositeObjective:
"""Default objective for market making.
Reward = PnL + 0.5*spread_capture - inventory_risk(gamma, sigma)
"""
return CompositeObjective([
(PnLObjective(), 1.0),
(SpreadCaptureReward(), 0.5),
(InventoryRiskPenalty(gamma=gamma, sigma=sigma), 1.0),
])

View File

@@ -0,0 +1,101 @@
"""
Standard objective components and penalties.
This module provides common reward terms:
- PnLObjective: Basic profit and loss
- VolatilityPenalty: Penalize price volatility for UX
- HoldingCostPenalty: Inventory holding cost
- LostOpportunityCostPenalty: Stockout/missed fill cost
- InventoryRiskPenalty: Quadratic inventory risk (market making)
- SpreadCaptureReward: Bid-ask spread capture (market making)
"""
from __future__ import annotations
import numpy as np
from .base import BaseObjective
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
from ..math_util import inventory_penalty
class PnLObjective(BaseObjective):
"""Profit and loss reward (revenue - cost)."""
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return metrics.pnl
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost}
class VolatilityPenalty(BaseObjective):
"""Penalize price volatility for user experience."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.volatility
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'volatility_penalty': -self.scale * metrics.volatility}
class HoldingCostPenalty(BaseObjective):
"""Penalty for inventory holding costs."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.position_cost
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'holding_cost_penalty': -self.scale * metrics.position_cost}
class LostOpportunityCostPenalty(BaseObjective):
"""Penalty for lost sales due to stockouts or missed fills."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.lost_opportunity
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity}
class InventoryRiskPenalty(BaseObjective):
"""Quadratic inventory risk penalty (Avellaneda-Stoikov style).
Penalty = gamma * sigma^2 * q^2 / 2, where q is total position.
Encourages market makers to keep inventory near zero.
"""
def __init__(self, gamma: float = 0.1, sigma: float = 1.0):
self.gamma = gamma
self.sigma = sigma
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
if obs.position is None: return 0.0
q = np.sum(obs.position)
return -inventory_penalty(q, self.gamma, self.sigma)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)}
class SpreadCaptureReward(BaseObjective):
"""Reward for capturing bid-ask spread in market making."""
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return metrics.spread_capture
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'spread_capture': metrics.spread_capture}

92
lab/outlet/observation.py Normal file
View File

@@ -0,0 +1,92 @@
"""
Observation construction with demand censoring.
This module provides the ObservationBuilder that constructs agent observations
from step data. The key invariant is that observations only contain censored
data (fills) and never true demand, ensuring proper research conditions.
The ObservationConfig controls what is included in observations:
- Position visibility
- Market/competitor visibility
- Demand proxy method
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from .types import Quote, InstrumentSet, StepLogs, StepMetrics, MarketState, HiddenState, Observation
@dataclass
class ObservationConfig:
"""Configuration for observation construction.
Attributes:
include_position: Include current position in observation
include_market: Include market/competitor state in observation
mask_true_demand: If True, observation excludes true demand (research mode)
demand_proxy: Method for demand proxy ('fills', 'exposures', 'weighted')
exposure_weights: Weights for weighted demand proxy
"""
include_position: bool = True
include_market: bool = True
mask_true_demand: bool = True
demand_proxy: str = 'fills'
exposure_weights: dict[str, float] | None = None
class DefaultObservationBuilder:
"""Constructs censored observations for the agent.
Ensures the key research invariant: observations contain only
censored fills (realized sales), never true demand. True demand
is placed in the info dict for research analysis only.
"""
def __init__(self, cfg: ObservationConfig | None = None):
self.cfg = cfg or ObservationConfig()
def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs,
metrics: StepMetrics, market: MarketState | None,
hidden: HiddenState, mask_demand: bool, t: int) -> Observation:
n = instruments.n
cfg = self.cfg
# always show censored fills
fills = logs.censored_fills if logs.censored_fills is not None else np.zeros(n)
# compute exposures from logs
if logs.events:
exposures = np.zeros(n)
for e in logs.events:
if e.instrument_id is not None:
exposures[e.instrument_id] += 1
else:
exposures = logs.aggregates.get('exposures', np.zeros(n))
# position - only if configured and available
position = None
if cfg.include_position and instruments.position is not None:
position = instruments.position.copy()
# market state - only if configured
obs_market = market if cfg.include_market else None
return Observation(
quotes=quote.prices.copy(),
position=position,
fills=fills,
exposures=exposures,
market=obs_market,
t=t
)
def make_space(self, n_instruments: int, include_market: bool = True) -> dict:
"""Returns dict describing observation space for gym"""
space = {
'quotes': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
'fills': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
'exposures': {'shape': (n_instruments,), 'low': 0, 'high': np.inf},
}
if self.cfg.include_position:
space['position'] = {'shape': (n_instruments,), 'low': -np.inf, 'high': np.inf}
if include_market:
space['competitor_quotes'] = {'shape': (n_instruments,), 'low': 0, 'high': np.inf}
return space

285
lab/outlet/platform.py Normal file
View File

@@ -0,0 +1,285 @@
"""
Main simulation platform orchestrating the Quote-Control loop.
The Platform class is the central coordinator that:
1. Receives pricing actions (quotes) from the agent
2. Generates arrivals via the ArrivalModel
3. Processes executions via Mechanism and ExecutionModel
4. Applies position censorship via PositionModel
5. Computes metrics and reward via Objective
6. Returns censored observations
Example:
>>> from lab.config import make_retail_platform
>>> platform = make_retail_platform()
>>> result = platform.reset(seed=42)
>>> result = platform.step(platform.instruments.refs * 1.1)
>>> print(f"PnL: {result.metrics.pnl:.2f}")
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import numpy as np
from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs, StepMetrics,
StepEvent, MarketState, HiddenState, Observation, StepResult)
from .constants import LogLevel, EventType, Side
from .protocols import Mechanism, ArrivalModel, ExecutionModel, PositionModel, MarketModel, ObservationBuilder, Objective
from .stock import PositionModel as DefaultPositionModel, PositionConfig
from .observation import DefaultObservationBuilder, ObservationConfig
from .objectives.factory import retail_objective
@dataclass
class PlatformConfig:
"""Configuration for the simulation platform.
Attributes:
n_instruments: Number of instruments in the simulation
max_steps: Maximum steps before episode terminates
dt: Time duration per step (affects arrival rates)
log_level: Verbosity of logging (NONE, AGG_ONLY, FULL)
mask_demand: If True, observations exclude true demand (research mode)
seed: Random seed for reproducibility
"""
n_instruments: int = 10
max_steps: int = 1000
dt: float = 1.0
log_level: LogLevel = LogLevel.AGG_ONLY
mask_demand: bool = True
seed: int | None = None
class Platform:
"""Main simulation orchestrator implementing Quote -> Arrival -> Execution -> Position.
The Platform coordinates all components to simulate a pricing environment:
- Mechanism: validates quotes and determines execution logic
- ArrivalModel: generates demand opportunities
- ExecutionModel: computes acceptance probabilities
- PositionModel: manages inventory/position and censorship
- MarketModel: updates competitor/market state
- ObservationBuilder: constructs censored observations
- Objective: computes reward from metrics
Attributes:
instruments: The instrument set being priced
mechanism: Quote validation and execution mechanism
arrival: Demand arrival generator
execution: Acceptance probability model
position: Inventory/position manager
market: Competitor/market dynamics (optional)
obs_builder: Observation constructor
objective: Reward function
cfg: Platform configuration
"""
def __init__(self, instruments: InstrumentSet, mechanism: Mechanism,
arrival: ArrivalModel, execution: ExecutionModel,
position: PositionModel | None = None,
market: MarketModel | None = None,
obs_builder: ObservationBuilder | None = None,
objective: Objective | None = None,
cfg: PlatformConfig | None = None):
self.instruments = instruments
self.mechanism = mechanism
self.arrival = arrival
self.execution = execution
self.position = position or DefaultPositionModel(PositionConfig())
self.market = market
self.obs_builder = obs_builder or DefaultObservationBuilder()
self.objective = objective or retail_objective()
self.cfg = cfg or PlatformConfig(n_instruments=instruments.n)
self._t: int = 0
self._rng: np.random.Generator = np.random.default_rng(self.cfg.seed)
self._quote: Quote | None = None
self._market_state: MarketState | None = None
self._hidden: HiddenState = HiddenState()
self._prev_prices: np.ndarray | None = None
def reset(self, seed: int | None = None) -> StepResult:
"""Reset the platform to initial state.
Args:
seed: Random seed (overrides config seed if provided)
Returns:
Initial StepResult with zeroed metrics and initial observation
"""
self._t = 0
self._rng = np.random.default_rng(seed or self.cfg.seed)
self._hidden = HiddenState()
self._prev_prices = self.instruments.refs.copy()
# reset position
self.position.reset(self.instruments, self._rng)
self.instruments.position = self.position.position
# initial quote at reference prices
self._quote = Quote(prices=self.instruments.refs.copy(), propensity=1.0,
metadata={'prev_prices': self._prev_prices})
self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng)
# initial market state
if self.market:
self._market_state = self.market.step(0, self._quote, self._hidden, self._rng)
# build initial observation
logs = StepLogs(aggregates={'reset': True},
true_demand=np.zeros(self.instruments.n),
censored_fills=np.zeros(self.instruments.n))
metrics = StepMetrics()
obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics,
self._market_state, self._hidden, self.cfg.mask_demand, 0)
return StepResult(obs=obs, reward=0.0, terminated=False, truncated=False,
info={'true_demand': logs.true_demand}, metrics=metrics,
logs=logs, hidden=self._hidden)
def step(self, action: np.ndarray, propensity: float = 1.0) -> StepResult:
"""Execute one simulation step with the given pricing action.
The step proceeds as follows:
1. Apply quote constraints via mechanism
2. Update market/competitor state
3. Generate arrivals
4. Process arrivals -> executions with acceptance check
5. Apply position censorship to executions
6. Update position state
7. Compute metrics (PnL, costs, etc.)
8. Build logs with propensities
9. Construct censored observation
10. Compute reward
Args:
action: Price vector for all instruments
propensity: P(action | behavior policy) for OPE logging
Returns:
StepResult containing observation, reward, metrics, logs, and hidden state
"""
self._t += 1
cfg = self.cfg
# 1. apply quote from action
self._quote = Quote(prices=action, propensity=propensity,
metadata={'prev_prices': self._prev_prices})
self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng)
self._prev_prices = self._quote.prices.copy()
self._hidden.quote_history.append(self._quote.prices.copy())
# 2. update market/competitors
if self.market:
self._market_state = self.market.step(self._t, self._quote, self._hidden, self._rng)
self._hidden.market_history.append(self._market_state)
# 3. generate arrivals
opps = self.arrival.sample(self._t, cfg.dt, self.instruments,
self._market_state, self._hidden, self._rng)
# 4. process opportunities -> executions
executions: list[Execution] = []
events: list[StepEvent] = []
true_demand = np.zeros(self.instruments.n)
for opp in opps:
# log exposure
if cfg.log_level == LogLevel.FULL:
events.append(StepEvent(t=opp.t, type=EventType.EXPOSURE,
instrument_id=opp.instrument_id,
opportunity_id=opp.id,
price=float(self._quote.prices[opp.instrument_id]),
propensity=self._quote.propensity))
# check acceptance
prob = self.execution.prob(opp, self._quote, self.instruments,
self._market_state, self._rng)
if self._rng.random() < prob:
# create execution
exe = self.mechanism.process_opportunity(opp, self._quote, self.instruments,
self._market_state, self._rng)
if exe:
true_demand[exe.instrument_id] += exe.size_requested
# apply position censorship
exe = self.position.apply_execution(exe)
executions.append(exe)
if cfg.log_level == LogLevel.FULL:
events.append(StepEvent(t=exe.t, type=EventType.EXECUTION,
instrument_id=exe.instrument_id,
opportunity_id=exe.opportunity_id,
price=exe.price, size=exe.size_filled,
propensity=exe.propensity))
# 5. update position state
self.position.step(self._t)
self.instruments.position = self.position.position
# 6. compute metrics
censored_fills = np.zeros(self.instruments.n)
revenue = 0.0
cost = 0.0
spread_capture = 0.0
for exe in executions:
censored_fills[exe.instrument_id] += exe.size_filled
if exe.side == Side.BUY:
revenue += exe.price * exe.size_filled
cost += self.instruments.costs[exe.instrument_id] * exe.size_filled
else:
revenue -= exe.price * exe.size_filled
cost -= self.instruments.costs[exe.instrument_id] * exe.size_filled
# spread capture for market making
if self._quote.spreads is not None and self._market_state and self._market_state.mid_prices is not None:
mid = self._market_state.mid_prices[exe.instrument_id]
if exe.side == Side.BUY:
spread_capture += (exe.price - mid) * exe.size_filled
else:
spread_capture += (mid - exe.price) * exe.size_filled
pnl = revenue - cost
units = float(np.sum(censored_fills))
lost = float(np.sum(true_demand - censored_fills))
# volatility
volatility = 0.0
if len(self._hidden.quote_history) > 1:
prev = self._hidden.quote_history[-2]
volatility = float(np.mean(np.abs(self._quote.prices - prev) / (prev + 1e-8)))
metrics = StepMetrics(
pnl=pnl, revenue=revenue, cost=cost, units_traded=units,
position_cost=self.position.holding_cost,
lost_opportunity=self.position.shortage_cost + lost * np.mean(self._quote.prices) * 0.1,
spread_capture=spread_capture, volatility=volatility,
conversion=units / (len(opps) + 1e-8),
per_instrument={'fills': censored_fills, 'demand': true_demand}
)
# 7. build logs
logs = StepLogs(
events=events if cfg.log_level == LogLevel.FULL else None,
executions=executions if cfg.log_level == LogLevel.FULL else None,
aggregates={'n_arrivals': len(opps), 'n_executions': len(executions),
'exposures': np.bincount([o.instrument_id for o in opps],
minlength=self.instruments.n).astype(float)},
true_demand=true_demand,
censored_fills=censored_fills
)
# 8. build observation
obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics,
self._market_state, self._hidden, cfg.mask_demand, self._t)
# 9. compute reward
reward = self.objective.reward(self._quote, self.instruments, metrics, self._hidden, obs)
breakdown = self.objective.breakdown(self._quote, self.instruments, metrics, self._hidden, obs)
# print(f"Step {self._t}: Reward={reward:.2f}, Breakdown={breakdown}")
# 10. check termination
terminated = self._t >= cfg.max_steps
truncated = False
info = {'true_demand': true_demand, 'breakdown': self.objective.breakdown(
self._quote, self.instruments, metrics, self._hidden, obs)}
return StepResult(obs=obs, reward=reward, terminated=terminated, truncated=truncated,
info=info, metrics=metrics, logs=logs, hidden=self._hidden)

297
lab/outlet/protocols.py Normal file
View File

@@ -0,0 +1,297 @@
"""
Protocol definitions for pluggable simulator components.
This module defines the interfaces (Protocols) that allow swapping different
implementations for each stage of the Quote -> Arrival -> Execution -> Position
pipeline. All protocols use structural subtyping (duck typing).
Protocols:
Mechanism: How quotes translate to executions (posted price, two-sided, auction)
ArrivalModel: How opportunities arrive (Poisson, Hawkes, sessions)
ExecutionModel: Acceptance probability given quote (elasticity, intensity)
PositionModel: Inventory/position management and censorship
MarketModel: Competitor/market dynamics
ObservationBuilder: Constructs agent observations with censoring
Objective: Computes reward from metrics
"""
from __future__ import annotations
from typing import Protocol, Any, TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs,
StepMetrics, HiddenState, Observation, MarketState)
from .constants import LogLevel
class Mechanism(Protocol):
"""Defines how quotes translate to executions.
The Mechanism is the core abstraction that differentiates pricing domains:
- PostedPrice: single price, buyer decides to purchase or not
- TwoSided: bid/ask spread, execution depends on distance from mid
- Auction: reserve price affects win probability and clearing price
Methods:
apply_quote: Enforce constraints and return valid quote
process_opportunity: Determine execution given opportunity and quote
"""
def apply_quote(self, quote: Quote, instruments: InstrumentSet,
rng: np.random.Generator) -> Quote:
"""Apply mechanism-specific constraints to a quote.
Args:
quote: Raw quote from policy
instruments: Current instrument set with costs/refs
rng: Random generator for stochastic constraints
Returns:
Constrained quote satisfying mechanism rules (min margin, max delta, etc.)
"""
...
def process_opportunity(self, opp: Opportunity, quote: Quote,
instruments: InstrumentSet, market: MarketState | None,
rng: np.random.Generator) -> Execution | None:
"""Process an opportunity against the current quote.
Args:
opp: Incoming opportunity (session, order, request)
quote: Current posted quote
instruments: Instrument set
market: Current market state (competitor prices, mid-prices)
rng: Random generator
Returns:
Execution if opportunity converts, None otherwise
"""
...
class ArrivalModel(Protocol):
"""Generates opportunities (demand arrivals) for each step.
Different arrival models capture different demand dynamics:
- Poisson: constant rate, memoryless
- Hawkes: self-exciting, clustered arrivals
- Session: retail browsing with multi-product views
Methods:
sample: Generate opportunities for a time interval
"""
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState,
rng: np.random.Generator) -> list[Opportunity]:
"""Sample opportunities for time interval [t, t+dt).
Args:
t: Current time
dt: Time interval length
instruments: Available instruments
market: Current market state
hidden: Hidden state (contains demand intensity, contamination)
rng: Random generator
Returns:
List of opportunities arriving in this interval
"""
...
class ExecutionModel(Protocol):
"""Computes acceptance/execution probability given quote and context.
Different models capture different demand responses:
- Elasticity: price sensitivity with competitor cross-effects
- Intensity: distance-based fill probability (market making)
- Logit: discrete choice model
Methods:
prob: Compute acceptance probability
uncensor: Estimate true demand from censored fills
"""
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
"""Compute probability that opportunity accepts the quote.
Args:
opp: Opportunity to evaluate
quote: Current quote
instruments: Instrument set
market: Market state (competitor prices affect cross-elasticity)
rng: Random generator
Returns:
Probability in [0, 1] that opportunity executes
"""
...
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
context: dict[str, Any] | None = None) -> np.ndarray:
"""Estimate true demand from censored fills.
Used for demand estimation research under inventory censorship.
Args:
fills: Observed (censored) fill counts
instruments: Instrument set
context: Additional context (exposures, prices shown)
Returns:
Estimated true demand counts
"""
...
class PositionModel(Protocol):
"""Manages inventory (retail) or position (finance).
Handles:
- Position constraints and censorship
- Holding costs (retail) or inventory risk (finance)
- Replenishment and order receipt
Methods:
reset: Initialize position state
available: Query available capacity for a trade
apply_execution: Censor execution by available position
step: Process time-based updates (replenishment, holding cost)
Properties:
position: Current position vector
holding_cost: Cost incurred this step from holding position
"""
def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None:
"""Initialize position state for new episode."""
...
def available(self, instrument_id: int, side: Any) -> float:
"""Query available capacity for a trade.
Args:
instrument_id: Which instrument
side: BUY or SELL
Returns:
Maximum tradeable size given current position
"""
...
def apply_execution(self, exe: Execution) -> Execution:
"""Apply position constraints to an execution.
Args:
exe: Proposed execution with size_requested
Returns:
Censored execution with size_filled <= available capacity
"""
...
def step(self, t: float) -> None:
"""Process time-based position updates.
Handles replenishment receipt, holding cost calculation, etc.
"""
...
@property
def position(self) -> np.ndarray:
"""Current position vector (positive=long/inventory, negative=short)."""
...
@property
def holding_cost(self) -> float:
"""Holding cost incurred this step."""
...
class MarketModel(Protocol):
"""Models external market dynamics and competitor behavior.
For retail: competitor price dynamics (static, reactive, stochastic)
For finance: mid-price process (GBM, mean-reverting)
Methods:
step: Update market state given agent's quotes
"""
def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
rng: np.random.Generator) -> MarketState:
"""Update market state for this timestep.
Args:
t: Current time
self_quotes: Agent's current quotes (competitors may react)
hidden: Hidden state (regime info)
rng: Random generator
Returns:
Updated market state with competitor prices, mid-prices, volatility
"""
...
class ObservationBuilder(Protocol):
"""Constructs agent observations with appropriate censoring.
Critical for research: ensures agent only sees censored fills,
never true demand (which goes in info dict).
Methods:
build: Construct observation from step data
"""
def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs,
metrics: StepMetrics, market: MarketState | None,
hidden: HiddenState, mask_demand: bool, t: int) -> Observation:
"""Build observation for agent.
Args:
quote: Current quote
instruments: Instrument set with positions
logs: Step logs with true_demand and censored_fills
metrics: Computed metrics
market: Market state
hidden: Hidden state (not included in obs)
mask_demand: If True, exclude true demand from observation
t: Current timestep
Returns:
Observation containing only observable quantities
"""
...
class Objective(Protocol):
"""Computes reward from step metrics.
Supports composite objectives with weighted terms:
- PnL (profit)
- Position costs (holding, inventory risk)
- Lost opportunity (stockouts)
- Volatility penalty (UX)
- Spread capture (market making)
Methods:
reward: Compute scalar reward
breakdown: Get per-term contribution for analysis
"""
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState,
obs: Observation) -> float:
"""Compute scalar reward for this step.
Args:
quote: Current quote
instruments: Instrument set
metrics: Step metrics (pnl, costs, etc.)
hidden: Hidden state
obs: Agent observation
Returns:
Scalar reward value
"""
...
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState,
obs: Observation) -> dict[str, float]:
"""Get reward breakdown by component.
Useful for analyzing which terms dominate the reward.
Returns:
Dict mapping term names to their contributions
"""
...

151
lab/outlet/stock.py Normal file
View File

@@ -0,0 +1,151 @@
"""
Inventory/position management and instrument factories.
This module provides:
- PositionConfig: Configuration for position constraints and costs
- PositionModel: Manages inventory (retail) or position (finance)
- make_instruments: Factory for creating instrument sets
The PositionModel handles demand censorship by limiting executions
to available inventory, computing holding costs, and managing replenishment.
"""
from __future__ import annotations
from dataclasses import dataclass, field
import numpy as np
from .types import Instrument, InstrumentSet, Execution
from .constants import Side, InstrumentType
@dataclass
class PositionConfig:
"""Configuration for position/inventory management.
Attributes:
initial_position: Starting inventory (None = unlimited, float = same for all)
max_position: Maximum long position per instrument
min_position: Maximum short position (negative, for finance)
holding_cost_rate: Cost per unit per step for holding inventory
shortage_cost_rate: Opportunity cost rate for stockouts
lead_time: Steps until replenishment orders arrive
"""
initial_position: np.ndarray | float | None = None
max_position: float = 1000.0
min_position: float = -1000.0
holding_cost_rate: float = 0.001
shortage_cost_rate: float = 0.05
lead_time: int = 0
@dataclass
class PositionModel:
"""Manages inventory (retail) or position (finance) with censorship.
Key responsibilities:
- Track current position per instrument
- Censor executions when position is insufficient
- Compute holding costs per step
- Track shortage/stockout costs
- Handle replenishment orders with lead time
For retail: position is inventory (positive), selling reduces it
For finance: position can be positive (long) or negative (short)
"""
cfg: PositionConfig
n: int = 0
_position: np.ndarray = field(default_factory=lambda: np.array([]))
_pending_orders: list[tuple[int, np.ndarray]] = field(default_factory=list)
_step_holding_cost: float = 0.0
_step_shortage_cost: float = 0.0
def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None:
self.n = instruments.n
if self.cfg.initial_position is None:
self._position = np.full(self.n, np.inf) # unlimited
elif isinstance(self.cfg.initial_position, (int, float)):
self._position = np.full(self.n, float(self.cfg.initial_position))
else:
self._position = self.cfg.initial_position.copy().astype(np.float64)
self._pending_orders = []
self._step_holding_cost = 0.0
self._step_shortage_cost = 0.0
def available(self, instrument_id: int, side: Side) -> float:
pos = self._position[instrument_id]
if np.isinf(pos): return np.inf
if side == Side.BUY:
return max(0, pos) # can sell up to current inventory
else:
return max(0, self.cfg.max_position - pos) # can buy up to max
def apply_execution(self, exe: Execution) -> Execution:
idx = int(exe.instrument_id)
avail = self.available(idx, exe.side)
filled = min(exe.size_requested, avail)
shortage = exe.size_requested - filled
if exe.side == Side.BUY:
self._position[idx] -= filled # sold from inventory
else:
self._position[idx] += filled # bought into inventory
if shortage > 0:
self._step_shortage_cost += shortage * exe.price * self.cfg.shortage_cost_rate
return Execution(
opportunity_id=exe.opportunity_id, instrument_id=exe.instrument_id,
side=exe.side, size_requested=exe.size_requested,
size_filled=filled, price=exe.price, propensity=exe.propensity, t=exe.t
)
def order(self, quantity: np.ndarray) -> None:
if self.cfg.lead_time > 0:
self._pending_orders.append((self.cfg.lead_time, quantity.copy()))
else:
self._position += quantity
def step(self, t: float) -> None:
# compute holding cost
pos = np.where(np.isinf(self._position), 0, self._position)
self._step_holding_cost = float(np.sum(np.abs(pos)) * self.cfg.holding_cost_rate)
# receive pending orders
new_pending = []
for (remaining, qty) in self._pending_orders:
if remaining <= 1:
self._position += qty
else:
new_pending.append((remaining - 1, qty))
self._pending_orders = new_pending
@property
def position(self) -> np.ndarray:
return np.where(np.isinf(self._position), -1, self._position)
@property
def holding_cost(self) -> float:
return self._step_holding_cost
@property
def shortage_cost(self) -> float:
return self._step_shortage_cost
def make_instruments(n: int, cost_range: tuple[float, float] = (1.0, 10.0),
margin_range: tuple[float, float] = (0.2, 0.5),
inst_type: InstrumentType = InstrumentType.SKU,
rng: np.random.Generator | None = None) -> InstrumentSet:
"""Factory function to create a random instrument set.
Args:
n: Number of instruments to create
cost_range: (min, max) for uniform cost sampling
margin_range: (min, max) for uniform margin sampling
inst_type: Type of instruments (SKU, ASSET, etc.)
rng: Random generator (uses default if None)
Returns:
InstrumentSet with n instruments having random costs and margins
"""
rng = rng or np.random.default_rng()
costs = rng.uniform(*cost_range, n)
margins = rng.uniform(*margin_range, n)
items = [Instrument(id=i, type=inst_type, cost_basis=c, reference_price=c*(1+m))
for i, (c, m) in enumerate(zip(costs, margins))]
return InstrumentSet(instruments=items)

318
lab/outlet/types.py Normal file
View File

@@ -0,0 +1,318 @@
"""
Core data types for the Quote-Control simulator.
This module defines the fundamental data structures used throughout the platform:
- Identifiers (InstrumentId, OpportunityId, AgentId)
- Domain objects (Instrument, Quote, Opportunity, Execution)
- Logging structures (StepEvent, StepLogs, StepMetrics)
- State containers (MarketState, HiddenState, Observation, StepResult)
All dataclasses are designed to be serializable and numpy-compatible.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, NewType
import numpy as np
from .constants import Side, InstrumentType, OpportunityType, EventType
InstrumentId = NewType('InstrumentId', int) # unique instrument index
OpportunityId = NewType('OpportunityId', str) # unique opportunity/session ID
AgentId = NewType('AgentId', str) # unique agent/actor ID
@dataclass
class Instrument:
"""Represents a priceable entity in the simulation.
An instrument can be a retail SKU, financial asset, loan product, or subscription.
The cost_basis represents the fundamental value (marginal cost for retail,
mid-price for assets, funding rate for loans).
Attributes:
id: Unique identifier for this instrument
type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION)
cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate)
reference_price: Base or fair price used for action scaling
attrs: Additional attributes (quality score, category, volatility, etc.)
"""
id: InstrumentId
type: InstrumentType
cost_basis: float
reference_price: float
attrs: dict[str, Any] = field(default_factory=dict)
@dataclass
class InstrumentSet:
"""Collection of instruments with optional position tracking.
Provides vectorized access to instrument properties for efficient computation.
Position can be positive (long/inventory) or negative (short) for financial assets.
Attributes:
instruments: List of Instrument objects
position: Current position per instrument (None = unlimited capacity)
Properties:
n: Number of instruments
costs: Vector of cost bases
refs: Vector of reference prices
"""
instruments: list[Instrument]
position: np.ndarray | None = None
@property
def n(self) -> int: return len(self.instruments)
@property
def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32)
@property
def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32)
@dataclass
class Quote:
"""Price quote set by the policy - the action in the MDP.
Supports multiple quoting mechanisms:
- Posted price: only `prices` field used
- Two-sided: `prices` as mid, `spreads` for bid-ask width
- Auction: `prices` as reserve prices
The propensity field is critical for off-policy evaluation (OPE).
Attributes:
prices: Posted prices (retail) or mid-quotes (market making)
spreads: Bid-ask spread width for two-sided quoting (None for posted price)
propensity: P(this quote | behavior policy) for importance sampling
metadata: Additional info (prev_prices for delta constraints, etc.)
Properties:
bids: Computed bid prices (mid - spread/2)
asks: Computed ask prices (mid + spread/2)
"""
prices: np.ndarray
spreads: np.ndarray | None = None
propensity: float = 1.0
metadata: dict[str, Any] = field(default_factory=dict)
@property
def bids(self) -> np.ndarray | None:
return self.prices - self.spreads/2 if self.spreads is not None else None
@property
def asks(self) -> np.ndarray | None:
return self.prices + self.spreads/2 if self.spreads is not None else None
@dataclass
class Opportunity:
"""An arrival event that may result in a transaction.
Opportunities are the demand side of the simulation:
- Retail: browsing session with purchase intent
- Market making: incoming market order
- Lending: loan application
The context dict carries segment/type information used by execution models.
Attributes:
id: Unique identifier for this opportunity
type: Category (SESSION, MARKET_ORDER, REQUEST)
side: BUY or SELL intent
instrument_id: Which instrument the opportunity targets
size: Requested transaction size (units, shares, principal)
t: Arrival timestamp
context: Segment info (is_scraper, credit_score, urgency, etc.)
"""
id: OpportunityId
type: OpportunityType
side: Side
instrument_id: InstrumentId
size: float = 1.0
t: float = 0.0
context: dict[str, Any] = field(default_factory=dict)
@dataclass
class Execution:
"""A realized transaction after acceptance and position censorship.
The difference between size_requested and size_filled represents
censored demand due to inventory/position constraints.
Attributes:
opportunity_id: Links back to the originating Opportunity
instrument_id: Which instrument was traded
side: BUY or SELL
size_requested: Original requested size (true demand)
size_filled: Actual filled size after censorship
price: Execution price
propensity: Combined propensity for OPE (quote * acceptance)
t: Execution timestamp
"""
opportunity_id: OpportunityId
instrument_id: InstrumentId
side: Side
size_requested: float
size_filled: float
price: float
propensity: float = 1.0
t: float = 0.0
@dataclass
class StepEvent:
"""Generic logged event"""
t: float
type: EventType
instrument_id: InstrumentId | None = None
opportunity_id: OpportunityId | None = None
price: float | None = None
size: float | None = None
propensity: float = 1.0
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class StepLogs:
"""Container for all logging data from a simulation step.
Supports both detailed event logging (for OPE) and aggregate-only mode
(for fast simulation). The true_demand vs censored_fills distinction
is critical for research on demand estimation under censorship.
Attributes:
events: Detailed event log (None if LogLevel != FULL)
executions: List of executed transactions (None if LogLevel != FULL)
aggregates: Always-available aggregate statistics
true_demand: Oracle demand before censorship (for research, not in obs)
censored_fills: Realized fills after position constraints (observable)
"""
events: list[StepEvent] | None = None
executions: list[Execution] | None = None
aggregates: dict[str, Any] = field(default_factory=dict)
true_demand: np.ndarray | None = None
censored_fills: np.ndarray | None = None
@dataclass
class StepMetrics:
"""Computed metrics for a single simulation step.
Metrics are domain-aware: retail uses revenue/cost/holding_cost,
market making uses spread_capture and inventory risk.
Attributes:
pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance)
revenue: Gross revenue from sales/executions
cost: Cost of goods sold or position acquisition cost
units_traded: Total units/shares transacted
position_cost: Holding cost (retail) or inventory risk penalty (finance)
lost_opportunity: Cost of stockouts or missed fills
spread_capture: Bid-ask spread captured (market making)
volatility: Price volatility metric for UX consideration
conversion: Fill rate (executions / opportunities)
per_instrument: Per-instrument breakdowns (fills, demand, etc.)
"""
pnl: float = 0.0
revenue: float = 0.0
cost: float = 0.0
units_traded: float = 0.0
position_cost: float = 0.0
lost_opportunity: float = 0.0
spread_capture: float = 0.0
volatility: float = 0.0
conversion: float = 0.0
per_instrument: dict[str, np.ndarray] = field(default_factory=dict)
@dataclass
class MarketState:
"""External market conditions and competitor state.
For retail: competitor_quotes drives cross-elasticity effects.
For finance: mid_prices and volatility drive execution dynamics.
Attributes:
competitor_quotes: Competitor posted prices (retail)
mid_prices: Market mid-prices for assets (finance)
volatility: Per-instrument volatility estimate
regime: Market regime identifier (normal, price_war, high_vol, etc.)
t: Timestamp of this market state
"""
competitor_quotes: np.ndarray | None = None
mid_prices: np.ndarray | None = None
volatility: np.ndarray | None = None
regime: str = 'normal'
t: float = 0.0
@dataclass
class HiddenState:
"""Internal simulator state not exposed to the agent.
Contains oracle information for research analysis and
history needed for non-stationary dynamics.
Attributes:
true_demand_intensity: Latent demand multiplier
contamination: Fraction of arrivals that are adversarial/scraper
regime: Current market/competitor regime
quote_history: History of agent quotes for volatility calculation
market_history: History of market states for analysis
"""
true_demand_intensity: float = 1.0
contamination: float = 0.0
regime: str = 'normal'
quote_history: list[np.ndarray] = field(default_factory=list)
market_history: list[MarketState] = field(default_factory=list)
@dataclass
class Observation:
"""Observable state provided to the agent - censored view only.
Critical invariant: Observation never contains true_demand, only
censored fills. This enforces the censorship research setting.
Attributes:
quotes: Current posted quotes (the agent's last action)
position: Current inventory/position state
fills: Censored execution counts per instrument
exposures: Opportunity exposure counts per instrument
market: Observable market state (competitor prices, volatility)
t: Current timestep
extra: Additional observable features
Methods:
to_flat: Flatten to numpy array for gym compatibility
"""
quotes: np.ndarray
position: np.ndarray | None
fills: np.ndarray
exposures: np.ndarray
market: MarketState | None
t: int
extra: dict[str, Any] = field(default_factory=dict)
def to_flat(self) -> np.ndarray:
"""Flatten observation to 1D numpy array for gym environments."""
parts = [self.quotes, self.fills, self.exposures]
if self.position is not None: parts.append(self.position)
if self.market and self.market.competitor_quotes is not None:
parts.append(self.market.competitor_quotes)
return np.concatenate([p.flatten() for p in parts])
@dataclass
class StepResult:
"""Complete result from a simulation step.
Follows gymnasium convention for obs, reward, terminated, truncated, info.
Additionally provides metrics, logs, and hidden state for research.
Attributes:
obs: Observable state (censored)
reward: Scalar reward from objective function
terminated: Episode ended naturally (max_steps reached)
truncated: Episode ended early (bankruptcy, constraint violation)
info: Additional info dict (contains true_demand for research)
metrics: Computed metrics for this step
logs: Event logs and aggregates
hidden: Internal simulator state (oracle info)
"""
obs: Observation
reward: float
terminated: bool
truncated: bool
info: dict[str, Any]
metrics: StepMetrics
logs: StepLogs
hidden: HiddenState

View File

@@ -0,0 +1,10 @@
from .arrivals import PoissonArrivalModel, HawkesArrivalModel, SessionArrivalModel
from .execution import ElasticityExecutionModel, IntensityExecutionModel, LogitExecutionModel
from .competitors import (StaticCompetitorModel, ReactiveCompetitorModel,
StochasticCompetitorModel, GBMMarketModel)
__all__ = [
'PoissonArrivalModel', 'HawkesArrivalModel', 'SessionArrivalModel',
'ElasticityExecutionModel', 'IntensityExecutionModel', 'LogitExecutionModel',
'StaticCompetitorModel', 'ReactiveCompetitorModel', 'StochasticCompetitorModel', 'GBMMarketModel',
]

168
lab/population/arrivals.py Normal file
View File

@@ -0,0 +1,168 @@
"""
Arrival models for generating demand opportunities.
This module provides different arrival processes:
- PoissonArrivalModel: Constant-rate memoryless arrivals
- HawkesArrivalModel: Self-exciting clustered arrivals (market orders)
- SessionArrivalModel: Retail browsing sessions with multi-product views
Each model implements the ArrivalModel protocol and generates Opportunity objects
that flow through the execution pipeline.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable
import numpy as np
from uuid import uuid4
from ..outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState
from ..outlet.constants import Side, OpportunityType
from ..outlet.math_util import poisson_arrivals, hawkes_intensity
@dataclass
class PoissonArrivalConfig:
"""Configuration for Poisson arrival process.
Attributes:
base_rate: Expected arrivals per unit time (scaled by hidden.true_demand_intensity)
side_probs: Probability distribution over BUY/SELL sides
"""
base_rate: float = 10.0
side_probs: dict[Side, float] = None
def __post_init__(self):
if self.side_probs is None:
self.side_probs = {Side.BUY: 1.0}
class PoissonArrivalModel:
"""Homogeneous Poisson arrival process.
Generates arrivals at a constant rate (modulated by demand intensity).
Suitable for stationary demand or as a baseline model.
The actual arrival count follows Poisson(rate * dt * intensity).
"""
def __init__(self, cfg: PoissonArrivalConfig | None = None):
self.cfg = cfg or PoissonArrivalConfig()
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState,
rng: np.random.Generator) -> list[Opportunity]:
n_arrivals = poisson_arrivals(self.cfg.base_rate * hidden.true_demand_intensity, dt, rng)
opps = []
for _ in range(n_arrivals):
inst_id = rng.integers(0, instruments.n)
side = rng.choice(list(self.cfg.side_probs.keys()),
p=list(self.cfg.side_probs.values()))
opps.append(Opportunity(
id=str(uuid4())[:8], type=OpportunityType.SESSION,
side=side, instrument_id=inst_id, size=1.0, t=t,
context={'segment': 'default'}
))
return opps
@dataclass
class HawkesArrivalConfig:
"""Configuration for Hawkes self-exciting process.
Attributes:
base_rate: Baseline arrival intensity
alpha: Excitation strength (how much each arrival increases intensity)
beta: Decay rate (how quickly excitation fades)
side_probs: Probability distribution over BUY/SELL sides
"""
base_rate: float = 5.0
alpha: float = 0.5
beta: float = 1.0
side_probs: dict[Side, float] = None
def __post_init__(self):
if self.side_probs is None:
self.side_probs = {Side.BUY: 0.5, Side.SELL: 0.5}
class HawkesArrivalModel:
"""Self-exciting Hawkes point process for clustered arrivals.
Models order flow where arrivals cluster in time (momentum, herding).
Intensity: lambda(t) = base + alpha * sum(exp(-beta * (t - t_i)))
Used for market making scenarios where orders arrive in bursts.
"""
def __init__(self, cfg: HawkesArrivalConfig | None = None):
self.cfg = cfg or HawkesArrivalConfig()
self._history: np.ndarray = np.array([])
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState,
rng: np.random.Generator) -> list[Opportunity]:
intensity = hawkes_intensity(
self.cfg.base_rate * hidden.true_demand_intensity,
self._history, self.cfg.alpha, self.cfg.beta, t
)
n_arrivals = poisson_arrivals(intensity, dt, rng)
opps = []
for i in range(n_arrivals):
arr_t = t + rng.uniform(0, dt)
self._history = np.append(self._history, arr_t)
inst_id = rng.integers(0, instruments.n)
side = rng.choice(list(self.cfg.side_probs.keys()),
p=list(self.cfg.side_probs.values()))
opps.append(Opportunity(
id=str(uuid4())[:8], type=OpportunityType.MARKET_ORDER,
side=side, instrument_id=inst_id,
size=rng.exponential(1.0), t=arr_t,
context={'intensity': intensity}
))
# decay old history
self._history = self._history[self._history > t - 10]
return opps
@dataclass
class SessionArrivalConfig:
"""Configuration for retail session arrivals.
Attributes:
sessions_per_step: Number of browsing sessions per step
views_per_session: (min, max) product views per session
contamination: Fraction of sessions that are scrapers/bots
"""
sessions_per_step: int = 20
views_per_session: tuple[int, int] = (1, 5)
contamination: float = 0.0
class SessionArrivalModel:
"""Retail browsing session model with multi-product views.
Each session views multiple products, generating one opportunity per view.
Scraper sessions (controlled by contamination) view more products
but convert at lower rates (handled by ExecutionModel).
"""
def __init__(self, cfg: SessionArrivalConfig | None = None):
self.cfg = cfg or SessionArrivalConfig()
def sample(self, t: float, dt: float, instruments: InstrumentSet,
market: MarketState | None, hidden: HiddenState,
rng: np.random.Generator) -> list[Opportunity]:
n_sessions = self.cfg.sessions_per_step
contamination = hidden.contamination if hidden else self.cfg.contamination
opps = []
for _ in range(n_sessions):
is_scraper = rng.random() < contamination
n_views = rng.integers(*self.cfg.views_per_session)
sid = str(uuid4())[:8]
# scrapers view more products
if is_scraper:
n_views = min(instruments.n, n_views * 3)
viewed = rng.choice(instruments.n, size=min(n_views, instruments.n), replace=False)
for inst_id in viewed:
opps.append(Opportunity(
id=f"{sid}-{inst_id}", type=OpportunityType.SESSION,
side=Side.BUY, instrument_id=int(inst_id), size=1.0, t=t,
context={'session_id': sid, 'is_scraper': is_scraper, 'n_views': n_views}
))
return opps

View File

@@ -0,0 +1,189 @@
"""
Market and competitor models for external dynamics.
This module provides models for competitor pricing (retail) and market dynamics (finance):
- StaticCompetitorModel: Fixed competitor prices
- ReactiveCompetitorModel: Competitor reacts to agent's prices, can trigger price wars
- StochasticCompetitorModel: Random walk competitor prices
- GBMMarketModel: Geometric Brownian Motion for asset mid-prices
Each model implements the MarketModel protocol.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ..outlet.types import Quote, MarketState, HiddenState
from ..outlet.math_util import clamp, ema
@dataclass
class StaticCompetitorConfig:
"""Configuration for static competitor.
Attributes:
markup: Fixed percentage markup over reference prices
"""
markup: float = 0.1
class StaticCompetitorModel:
"""Static competitor with fixed markup pricing.
Competitor prices = reference * (1 + markup).
Useful as a baseline or for testing without competitor dynamics.
"""
def __init__(self, cfg: StaticCompetitorConfig | None = None, refs: np.ndarray | None = None):
self.cfg = cfg or StaticCompetitorConfig()
self.refs = refs
def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
rng: np.random.Generator) -> MarketState:
refs = self.refs if self.refs is not None else self_quotes.prices
comp_prices = refs * (1 + self.cfg.markup)
return MarketState(competitor_quotes=comp_prices, regime='static', t=t)
@dataclass
class ReactiveCompetitorConfig:
"""Configuration for reactive competitor.
Attributes:
follow_weight: Smoothing weight for price following (0=ignore, 1=instant)
band_pct: Maximum deviation from reference prices
war_threshold: Relative price diff that triggers price war
war_aggression: How much competitor cuts prices during war
"""
follow_weight: float = 0.3
band_pct: float = 0.1
war_threshold: float = -0.15
war_aggression: float = 0.2
class ReactiveCompetitorModel:
"""Competitor that reacts to agent's prices with price war dynamics.
The competitor follows the agent's prices with smoothing.
If the agent undercuts significantly (beyond war_threshold),
a price war is triggered where the competitor becomes more aggressive.
This creates non-stationary dynamics that test policy robustness.
"""
def __init__(self, cfg: ReactiveCompetitorConfig | None = None, refs: np.ndarray | None = None):
self.cfg = cfg or ReactiveCompetitorConfig()
self.refs = refs
self._prices: np.ndarray | None = None
self._in_war: bool = False
def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
rng: np.random.Generator) -> MarketState:
refs = self.refs if self.refs is not None else self_quotes.prices
c = self.cfg
if self._prices is None:
self._prices = refs.copy()
# check for price war trigger
relative_diff = (self_quotes.prices - self._prices) / (self._prices + 1e-8)
if np.any(relative_diff < c.war_threshold):
self._in_war = True
elif np.all(relative_diff > -c.war_threshold / 2):
self._in_war = False
# update prices
if self._in_war:
target = self_quotes.prices * (1 - c.war_aggression)
hidden.regime = 'price_war'
else:
target = self_quotes.prices * (1 + c.follow_weight * 0.05)
hidden.regime = 'normal'
# follow with smoothing
new_prices = np.array([ema(old, new, c.follow_weight)
for old, new in zip(self._prices, target)])
# stay within band
new_prices = clamp(new_prices, refs * (1 - c.band_pct), refs * (1 + c.band_pct))
self._prices = new_prices
return MarketState(competitor_quotes=new_prices, regime=hidden.regime, t=t)
@dataclass
class StochasticCompetitorConfig:
"""Configuration for stochastic competitor.
Attributes:
drift: Price drift per step
volatility: Price volatility (std of random shocks)
mean_revert: Mean reversion strength toward reference
"""
drift: float = 0.0
volatility: float = 0.02
mean_revert: float = 0.1
class StochasticCompetitorModel:
"""Ornstein-Uhlenbeck style stochastic competitor prices.
Prices follow: dP = drift + mean_revert*(ref - P) + volatility*P*dW
Provides non-stationary competitor dynamics independent of agent actions.
Useful for testing robustness to market noise.
"""
def __init__(self, cfg: StochasticCompetitorConfig | None = None, refs: np.ndarray | None = None):
self.cfg = cfg or StochasticCompetitorConfig()
self.refs = refs
self._prices: np.ndarray | None = None
def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
rng: np.random.Generator) -> MarketState:
refs = self.refs if self.refs is not None else self_quotes.prices
c = self.cfg
if self._prices is None:
self._prices = refs.copy()
# Ornstein-Uhlenbeck style dynamics
n = len(self._prices)
noise = rng.normal(0, c.volatility, n)
reversion = c.mean_revert * (refs - self._prices)
self._prices = self._prices + c.drift + reversion + noise * self._prices
self._prices = np.maximum(self._prices, refs * 0.5)
return MarketState(competitor_quotes=self._prices.copy(), regime='stochastic', t=t)
@dataclass
class GBMMarketConfig:
"""Configuration for GBM market model.
Attributes:
mu: Price drift (expected return)
sigma: Price volatility
dt: Time step size
"""
mu: float = 0.0
sigma: float = 0.1
dt: float = 1.0
class GBMMarketModel:
"""Geometric Brownian Motion model for asset mid-prices.
Standard Black-Scholes dynamics: dS = mu*S*dt + sigma*S*dW
Used for market making scenarios where the underlying asset price
follows a random walk. The agent quotes around this moving mid-price.
"""
def __init__(self, cfg: GBMMarketConfig | None = None, initial: np.ndarray | None = None):
self.cfg = cfg or GBMMarketConfig()
self._mids = initial
def step(self, t: float, self_quotes: Quote, hidden: HiddenState,
rng: np.random.Generator) -> MarketState:
if self._mids is None:
self._mids = self_quotes.prices.copy()
c = self.cfg
n = len(self._mids)
z = rng.standard_normal(n)
self._mids = self._mids * np.exp((c.mu - 0.5*c.sigma**2)*c.dt + c.sigma*np.sqrt(c.dt)*z)
vol = np.full(n, c.sigma)
return MarketState(mid_prices=self._mids.copy(), volatility=vol, regime='gbm', t=t)

174
lab/population/execution.py Normal file
View File

@@ -0,0 +1,174 @@
"""
Execution models for computing acceptance/fill probabilities.
This module provides different models for how opportunities convert to executions:
- ElasticityExecutionModel: Price elasticity with competitor cross-effects (retail)
- IntensityExecutionModel: Distance-based fill intensity (market making)
- LogitExecutionModel: Discrete choice model
Each model implements the ExecutionModel protocol.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
import numpy as np
from ..outlet.types import Opportunity, Quote, InstrumentSet, MarketState
from ..outlet.constants import Side
from ..outlet.math_util import sigmoid, safe_log, intensity_decay, EPS
@dataclass
class ElasticityConfig:
"""Configuration for price elasticity execution model.
Attributes:
base_prob: Baseline purchase probability at reference price
price_sensitivity: Own-price elasticity coefficient
cross_elasticity: Competitor price cross-elasticity
scraper_conversion: Multiplier for scraper conversion (typically << 1)
"""
base_prob: float = 0.3
price_sensitivity: float = 2.0
cross_elasticity: float = 0.5
scraper_conversion: float = 0.01
class ElasticityExecutionModel:
"""Price elasticity model for retail dynamic pricing.
P(buy) = base_prob * exp(-sensitivity * log(price/ref)) * cross_effect * scraper_mult
Higher prices reduce purchase probability exponentially.
Competitor undercutting shifts demand away from the platform.
Scrapers convert at a much lower rate (reconnaissance, not purchase).
"""
def __init__(self, cfg: ElasticityConfig | None = None):
self.cfg = cfg or ElasticityConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
idx = int(opp.instrument_id)
price = quote.prices[idx]
ref = instruments.refs[idx]
# base probability adjusted by price ratio
log_ratio = safe_log(price / ref)
prob = self.cfg.base_prob * np.exp(-self.cfg.price_sensitivity * log_ratio)
# cross-elasticity: competitor undercutting increases their share
if market and market.competitor_quotes is not None:
comp_price = market.competitor_quotes[idx]
if comp_price < price:
prob *= np.exp(-self.cfg.cross_elasticity * (price - comp_price) / ref)
# scrapers convert at much lower rate
if opp.context.get('is_scraper', False):
prob *= self.cfg.scraper_conversion
return float(np.clip(prob, 0, 1))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
context: dict[str, Any] | None = None) -> np.ndarray:
# simple imputation: assume fills = prob * exposures, invert
exposures = context.get('exposures', fills) if context else fills
avg_prob = self.cfg.base_prob
return fills / (avg_prob + EPS)
@dataclass
class IntensityConfig:
"""Configuration for intensity-based execution model.
Attributes:
base_intensity: Baseline fill intensity
kappa: Decay rate with distance from mid-price
vol_scale: Volatility multiplier for fill intensity
"""
base_intensity: float = 1.0
kappa: float = 1.5
vol_scale: float = 0.5
class IntensityExecutionModel:
"""Avellaneda-Stoikov style fill intensity for market making.
Fill probability decays exponentially with distance from mid-price:
P(fill) = base * exp(-kappa * |quote - mid|) * (1 + vol_scale * sigma)
Tighter spreads (closer to mid) have higher fill probability.
Higher volatility increases fill probability (more aggressive traders).
"""
def __init__(self, cfg: IntensityConfig | None = None):
self.cfg = cfg or IntensityConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
idx = int(opp.instrument_id)
# get mid price from market or use quote price
if market and market.mid_prices is not None:
mid = market.mid_prices[idx]
else:
mid = quote.prices[idx]
# compute distance from mid
if opp.side == Side.BUY:
exec_price = quote.asks[idx] if quote.asks is not None else quote.prices[idx]
distance = exec_price - mid
else:
exec_price = quote.bids[idx] if quote.bids is not None else quote.prices[idx]
distance = mid - exec_price
# intensity decays with distance
intensity = self.cfg.base_intensity * intensity_decay(abs(distance), self.cfg.kappa)
# volatility increases fill probability
if market and market.volatility is not None:
vol = market.volatility[idx]
intensity *= (1 + self.cfg.vol_scale * vol)
return float(np.clip(intensity, 0, 1))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
context: dict[str, Any] | None = None) -> np.ndarray:
return fills # market making doesn't have same censorship concept
@dataclass
class LogitConfig:
"""Configuration for logit discrete choice model.
Attributes:
beta_0: Intercept (base utility)
beta_price: Price coefficient (typically negative)
beta_quality: Quality attribute coefficient
"""
beta_0: float = 0.5
beta_price: float = -1.5
beta_quality: float = 0.3
class LogitExecutionModel:
"""Discrete choice logit model for purchase probability.
Utility: U = beta_0 + beta_price * (price/ref) + beta_quality * quality
P(buy) = sigmoid(U)
Provides a theoretically grounded demand model from economics literature.
"""
def __init__(self, cfg: LogitConfig | None = None):
self.cfg = cfg or LogitConfig()
def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet,
market: MarketState | None, rng: np.random.Generator) -> float:
idx = int(opp.instrument_id)
price = quote.prices[idx]
ref = instruments.refs[idx]
quality = instruments.instruments[idx].attrs.get('quality', 0.5)
# utility
u = self.cfg.beta_0 + self.cfg.beta_price * (price / ref) + self.cfg.beta_quality * quality
# choice probability via sigmoid
return float(sigmoid(u))
def uncensor(self, fills: np.ndarray, instruments: InstrumentSet,
context: dict[str, Any] | None = None) -> np.ndarray:
return fills / (self.cfg.beta_0 + EPS)

59
lab/run_example.py Normal file
View File

@@ -0,0 +1,59 @@
#!/usr/bin/env python
"""Example script demonstrating the Quote-Control platform"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import numpy as np
from lab.config import make_retail_platform, make_market_making_platform
from lab.experiments.eval import (rollout, compare_policies, fixed_price_policy,
cost_plus_margin_policy, random_walk_policy)
def demo_retail():
print("=" * 60)
print("RETAIL DYNAMIC PRICING DEMO")
print("=" * 60)
platform = make_retail_platform()
print(f"Instruments: {platform.instruments.n}")
print(f"Reference prices: {platform.instruments.refs[:5].round(2)}...")
# compare policies
policies = {
'fixed': fixed_price_policy(platform.instruments.refs),
'cost_plus_30%': cost_plus_margin_policy(platform.instruments.costs, 0.3),
'cost_plus_50%': cost_plus_margin_policy(platform.instruments.costs, 0.5),
'random_walk': random_walk_policy(platform.instruments.refs, 0.03),
}
results = compare_policies(platform, policies, n_steps=100, n_runs=3)
print("\nPolicy Comparison (100 steps, 3 runs):")
print("-" * 50)
for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_pnl']):
print(f"{name:20s} PnL={r['mean_pnl']:8.1f} +/- {r['std_reward']:6.1f} "
f"conv={r['mean_conversion']:.3f}")
def demo_market_making():
print("\n" + "=" * 60)
print("MARKET MAKING DEMO")
print("=" * 60)
platform = make_market_making_platform()
print(f"Instruments: {platform.instruments.n}")
print(f"Initial mids: {platform.instruments.refs.round(2)}")
# simple policy: quote at mid with fixed spread
def mm_policy(obs: np.ndarray, t: int):
mids = platform.instruments.refs # would use obs in real policy
return mids, 1.0
result = rollout(platform, mm_policy, n_steps=200, seed=42)
print(f"\nRollout (200 steps):")
print(f" Total PnL: {result.total_pnl:.2f}")
print(f" Avg conversion: {result.avg_conversion:.3f}")
print(f" Total spread capture: {sum(m.spread_capture for m in result.metrics):.2f}")
if __name__ == '__main__':
demo_retail()
demo_market_making()