diff --git a/lab/README.md b/lab/README.md deleted file mode 100644 index b5226aa..0000000 --- a/lab/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# MOS (Money Operating System) - -Research-grade quote-control simulator for studying dynamic pricing and market making policies. -The system models pricing as a closed loop of **Quote → Arrival → Execution → Position**, enabling -controlled experimentation with demand models, inventory constraints, and reward shaping. - -## Core Loop - -1. **Quote** – the policy posts prices (one-sided or two-sided depending on the mechanism). -2. **Arrival** – a population model generates purchase opportunities or market orders. -3. **Execution** – an execution model decides whether an arrival converts at the quoted price. -4. **Position** – inventory/position limits censor fills and generate holding/shortage costs. -5. **Observation & Reward** – censored fills and aggregate metrics are exposed to the agent, while - objectives turn metrics into a scalar reward. - -Each stage is pluggable via light-weight protocols so you can swap in alternative mechanisms, -demand models, or objectives without rewriting the rest of the simulator. - -## Package Layout - -| Module | Purpose | -|-------------------|---------| -| `lab.outlet` | Core simulation engine, domain types, pricing mechanisms, objectives. | -| `lab.population` | Demand arrival models, execution probability models, competitor/market dynamics. | -| `lab.experiments` | Rollout utilities, baseline policies, and off-policy evaluation helpers. | -| `lab.config` | Convenience factories for preconfigured retail and market-making environments. | - -## Preconfigured Scenarios - -### Retail Dynamic Pricing -- Mechanism: posted prices with margin and delta constraints. -- Arrivals: browsing sessions with contamination support (scrapers). -- Execution: elasticity model with competitor cross-effects. -- Position: inventory tracking with holding and shortage costs. -- Market: reactive competitor that can trigger price wars. -- Objective: PnL minus volatility, holding cost, and lost opportunity penalties. - -```python -from lab.config import make_retail_platform -from lab.experiments import rollout, fixed_price_policy - -platform = make_retail_platform() -policy = fixed_price_policy(platform.instruments.refs) -result = rollout(platform, policy, n_steps=100) -print(result.total_pnl) -``` - -### Market Making -- Mechanism: two-sided quoting with bid/ask spreads. -- Arrivals: Hawkes order flow for clustered demand. -- Execution: Avellaneda–Stoikov style intensity model. -- Position: inventory risk limits and quadratic penalty objective. -- Market: geometric Brownian motion mid-price process. -- Objective: PnL plus spread capture minus inventory risk. - -```python -from lab.config import make_market_making_platform -from lab.experiments import rollout - -platform = make_market_making_platform() -mm_policy = lambda obs, t: (platform.instruments.refs, 1.0) -result = rollout(platform, mm_policy, n_steps=200, seed=42) -print(result.total_pnl) -``` - -## Extending the Simulator - -- Implement `lab.outlet.protocols.Mechanism` or `ArrivalModel` to introduce new pricing -domains or demand processes. -- Compose objectives with `lab.outlet.objectives.factory.make_composite` to study alternate -reward formulations. -- Use `lab.experiments.compare_policies` to benchmark candidate policies across multiple -random seeds. - -Comprehensive API documentation lives in `lab/docs` (build with `make html`). diff --git a/lab/__init__.py b/lab/__init__.py deleted file mode 100644 index cc6df0c..0000000 --- a/lab/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Quote-Control Simulator: Research-grade platform for dynamic pricing and market making - -The platform abstracts pricing as: Quote -> Arrival -> Execution -> Position -Supports multiple mechanisms: - - PostedPrice: retail dynamic pricing - - TwoSided: market making with bid-ask spreads - - Auction: reserve/shading for auction settings - -Example usage: - from lab.config import make_retail_platform - from lab.experiments import rollout, fixed_price_policy - - platform = make_retail_platform() - policy = fixed_price_policy(platform.instruments.refs) - result = rollout(platform, policy, n_steps=100) - print(f"Total PnL: {result.total_pnl:.2f}") -""" - -from .config import make_retail_platform, make_market_making_platform, RetailConfig, MarketMakingConfig -from .outlet import Platform, PlatformConfig, Quote, Observation, StepResult - -__all__ = [ - 'make_retail_platform', 'make_market_making_platform', - 'RetailConfig', 'MarketMakingConfig', - 'Platform', 'PlatformConfig', 'Quote', 'Observation', 'StepResult', -] diff --git a/lab/case/__init__.py b/lab/case/__init__.py deleted file mode 100644 index 44fbf8c..0000000 --- a/lab/case/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -Case studies implementing specific research scenarios. - -Available cases: -- thesis: PHANTOM thesis implementation with contaminated demand and DR-RL -""" diff --git a/lab/case/thesis/__init__.py b/lab/case/thesis/__init__.py deleted file mode 100644 index 31db465..0000000 --- a/lab/case/thesis/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Thesis-specific implementation of the PHANTOM pricing defense framework. - -This module implements the mathematical models from the thesis: -- ContaminatedArrivalModel: Mixture demand Q(p) = (1-α)d_H + αd_A (Eq 3) -- HybridExecutionModel: Divergent H/A behavior with separability (Section 2.1) -- RobustStackelbergObjective: Maximin objective with COI penalty (Eq 23) -- COIMetrics: Cost of Information tracking (Definition 1) - -The platform configuration creates a research environment that directly -maps to the thesis mathematical framework for DR-RL experiments. -""" -from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig -from .execution import HybridExecutionModel, HybridExecutionConfig -from .objectives import RobustStackelbergObjective, COIObjective -from .platform import make_thesis_platform, ThesisConfig -from .metrics import COIMetrics, compute_coi, compute_separability - -__all__ = [ - 'ContaminatedArrivalModel', 'ContaminatedArrivalConfig', - 'HybridExecutionModel', 'HybridExecutionConfig', - 'RobustStackelbergObjective', 'COIObjective', - 'make_thesis_platform', 'ThesisConfig', - 'COIMetrics', 'compute_coi', 'compute_separability', -] diff --git a/lab/case/thesis/arrivals.py b/lab/case/thesis/arrivals.py deleted file mode 100644 index 909cab5..0000000 --- a/lab/case/thesis/arrivals.py +++ /dev/null @@ -1,327 +0,0 @@ -"""Contaminated arrivals using learned MDP kernels from behavior_loader. - -Implements thesis demand model (Section 3.1): -- Aggregate demand Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3) -- Demand proxy q̂_{t,i} = Σ_s Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] (Eq 2) -- Per-session separability via KL divergence Δ_H, Δ_A (Eq 20-21) - -The arrival model samples sessions from a mixture of human/agent behavioral profiles, -each session produces a trajectory τ_s and associated demand computation q(τ'). -""" -from __future__ import annotations -from dataclasses import dataclass, field -from types import SimpleNamespace -from typing import Dict, List, Tuple, Optional -import numpy as np -from ...outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState -from ...outlet.constants import Side, OpportunityType -from ...outlet.math_util import poisson_arrivals - -try: - import sys - from pathlib import Path - sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - from sim.rl.behavior_loader.models import ( - BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence - ) - REAL_MDP = True -except ImportError: - REAL_MDP = False - kl_divergence = None - -EVENT_PAGE = {"session_start": "/", "view_item_page": "/products", "learn_more_about_item": "/products/details", - "add_item_to_cart": "/cart", "purchase_complete": "/checkout", "session_end": "/checkout/success"} -EVENT_CANON = {"page_view": "session_start", "hover_over_paragraph": "view_item_page", "hover_over_title": "view_item_page", - "view_item_page": "view_item_page", "learn_more_about_item": "learn_more_about_item", - "add_item_to_cart": "add_item_to_cart", "checkout_start": "purchase_complete", "remove_item": "view_item_page"} - -# action space partition A = A_nav ∪ A_cart ∪ A_filter ∪ A_dwell with signal weights ω (Table 1) -ACTION_WEIGHTS: Dict[str, float] = { - "add_item_to_cart": 0.8, "remove_item": 0.6, "checkout_start": 0.9, "purchase_complete": 1.0, # A_cart - "hover_over_title": 0.3, "hover_over_paragraph": 0.35, "hover_over_link": 0.25, # A_dwell - "page_view": 0.1, "session_start": 0.05, "view_item_page": 0.15, "learn_more_about_item": 0.2, # A_nav - "search": 0.05, "filter_date": 0.05, "filter_price": 0.08, "sort": 0.03, "session_end": 0.0, # A_filter -} - - -@dataclass -class SessionDemand: - """Per-session demand computation per thesis formulation (Section 3.1). - - Each session s ∈ S produces trajectory τ_s and demand proxy q̂. The platform uses - divergence signals Δ_H, Δ_A to estimate per-session contamination α̂(τ'). - """ - session_id: str - q: Dict[int, float] # q̂_i demand proxy per product (Eq 2) - trajectory: List[Dict] # τ_s = (e_{s,1}, ..., e_{s,L_s}) - delta_h: float = 0.0 # D_KL(T̂' || T̄_H) (Eq 20) - delta_a: float = 0.0 # D_KL(T̂' || T̄_A) (Eq 21) - alpha_hat: float = 0.0 # per-session contamination estimate - actor_class: str = "H" # ground truth Y_s ∈ {H, A} - theta: Dict[str, float] = field(default_factory=dict) - - -def compute_demand_proxy(events: List[Dict], n_products: int) -> Dict[int, float]: - """Compute q̂_{t,i} = Σ_k ω(a_{s,k}) · 1[i_{s,k} = i] per Eq 2.""" - q = {i: 0.0 for i in range(n_products)} - for e in events: - action, pidx = e.get("eventName", ""), e.get("product_idx") - if pidx is not None and 0 <= pidx < n_products: - q[pidx] += ACTION_WEIGHTS.get(action, 0.1) - return q - - -def compute_session_divergence(events: List[Dict], ref_h: Dict, ref_a: Dict) -> Tuple[float, float]: - """Compute Δ_H, Δ_A divergence signals from trajectory (Eq 20-21).""" - if not events or kl_divergence is None: - return 0.0, 0.0 - # build empirical transition kernel from trajectory - trans: Dict[str, Dict[str, int]] = {} - prev = "session_start" - for e in events: - curr = e.get("eventName", "session_end") - trans.setdefault(prev, {}) - trans[prev][curr] = trans[prev].get(curr, 0) + 1 - prev = curr - # normalize to probabilities - kernel = {} - for s, dests in trans.items(): - total = sum(dests.values()) - kernel[s] = {d: c / total for d, c in dests.items()} if total > 0 else {} - # aggregate to event-level and compute KL divergence against reference kernels - delta_h = sum(kl_divergence(kernel.get(s, {}), ref_h.get(s, {})) for s in kernel) / max(len(kernel), 1) - delta_a = sum(kl_divergence(kernel.get(s, {}), ref_a.get(s, {})) for s in kernel) / max(len(kernel), 1) - return delta_h, delta_a - -def _canonicalize(raw: Dict) -> Dict: - out = {} - for src, dsts in raw.items(): - sc = EVENT_CANON.get(src, src) - out.setdefault(sc, {}) - for dst, p in dsts.items(): - dc = EVENT_CANON.get(dst, dst) - out[sc][dc] = out[sc].get(dc, 0.0) + p - return {s: {k: v/sum(d.values()) for k, v in d.items()} for s, d in out.items() if sum(d.values()) > 0} - - -class BehavioralProfile: - """Markov profile from learned MDP kernels (Section 3.5.2). - - Transition kernel T̂_Y estimated via MLE: P̂(s'|s) = N(s,s') / Σ_k N(s,k) (Eq 19) - """ - STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"] - # fallback kernels T̄_H, T̄_A when real data unavailable - FALLBACK_H = {"session_start": {"view_item_page": 0.85, "session_end": 0.15}, - "view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1}, - "learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2}, - "add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15}, - "purchase_complete": {"session_end": 1.0}} - FALLBACK_A = {"session_start": {"view_item_page": 0.95, "session_end": 0.05}, - "view_item_page": {"learn_more_about_item": 0.6, "view_item_page": 0.25, "add_item_to_cart": 0.1, "session_end": 0.05}, - "learn_more_about_item": {"view_item_page": 0.5, "add_item_to_cart": 0.15, "learn_more_about_item": 0.3, "session_end": 0.05}, - "add_item_to_cart": {"view_item_page": 0.4, "purchase_complete": 0.2, "session_end": 0.4}, - "purchase_complete": {"session_end": 1.0}} - - def __init__(self, actor: str, pprobs: np.ndarray, data_dir: str = ""): - self.actor, self.pprobs = actor, np.clip(pprobs, 0.0, 0.95) - self.trans = self._load(data_dir) # T̂_Y transition kernel - self._ensure_terminal() - self.dwell = {s: (1.2, 0.5) if actor == "agents" else (2.0, 1.2) for s in self.STATES} - - def _load(self, data_dir: str) -> Dict: - if not REAL_MDP or not data_dir: - print("using fallback") - return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H) - try: - mdp = (AgentBehaviorModel if self.actor == "agents" else BehaviorModel)(data_dir).build_MDP() - raw = aggregate_event_transitions(mdp) if mdp.get("transitions") else {} - return _canonicalize(raw) if raw else dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H) - except Exception: - print("using fallback") - return dict(self.FALLBACK_A if self.actor == "agents" else self.FALLBACK_H) - - def _ensure_terminal(self): - self.trans.setdefault("purchase_complete", {})["session_end"] = self.trans.get("purchase_complete", {}).get("session_end", 1.0) - self.trans.setdefault("session_start", {"view_item_page": 0.7, "learn_more_about_item": 0.2, "session_end": 0.1}) - - def _tprobs(self, state: str, pidx: int) -> Dict[str, float]: - probs = dict(self.trans.get(state, {"session_end": 1.0})) - if state == "add_item_to_cart": - base = probs.get("purchase_complete", 0.0) - df = float(self.pprobs[pidx]) * (0.3 if self.actor == "agents" else 1.0) - adj = np.clip(base * 0.5 + df * 0.5, 0.0, 0.95) - rem = max(1e-6, 1.0 - adj) - other = sum(v for k, v in probs.items() if k != "purchase_complete") - probs = {k: (adj if k == "purchase_complete" else v * rem / max(other, 1e-6)) for k, v in probs.items()} - total = sum(probs.values()) - return {k: v/total for k, v in probs.items()} if total > 0 else {"session_end": 1.0} - - def sample(self, rng: np.random.Generator, sid: str, prices: np.ndarray, costs: np.ndarray) -> Tuple[List[Dict], List[SimpleNamespace]]: - events, fevts = [], [] - state, t, pidx = "session_start", 0.0, int(rng.integers(0, len(prices))) - cost, cprice = float(costs[pidx]), max(float(prices[pidx]), float(costs[pidx]) * 1.05) - - while state != "session_end" and len(events) < 40: - if state != "session_start": - row = {"session_id": sid, "actor": "agent" if self.actor == "agents" else "human", - "eventName": state, "product_idx": pidx, "productId": f"product-{pidx:04d}", - "price_offered": cprice, "price_paid": 0.0, "page": EVENT_PAGE.get(state, "/"), - "ts": t, "unit_cost": cost, "base_price": float(prices[pidx])} - if state == "purchase_complete": - row["price_paid"] = max(cprice * (1.0 + rng.normal(0.0, 0.015)), cost) - events.append(row) - fevts.append(SimpleNamespace(eventName=state, page=row["page"], productId=row["productId"], ts=t)) - - probs = self._tprobs(state, pidx) - state = rng.choice(list(probs.keys()), p=list(probs.values())) - sh, sc = self.dwell.get(state, (2.0, 1.0)) - t += max(0.3, rng.gamma(shape=sh, scale=sc)) - return events, fevts - - -@dataclass -class ContaminatedArrivalConfig: - base_rate: float = 20.0 - alpha_contamination: float = 0.2 - alpha_drift: float = 0.0 - alpha_bounds: tuple[float, float] = (0.0, 0.5) - human_views_range: tuple[int, int] = (1, 4) - agent_views_range: tuple[int, int] = (3, 10) - agent_systematic: bool = True - use_real_behavior: bool = True - human_data_dir: str = "" - agent_data_dir: str = "" - - -class ContaminatedArrivalModel: - """Mixture model Q(p) = (1-α)E[d(p;θ_H)] + αE[d(p;θ_A)] + ε_t (Eq 3). - - Samples sessions from human/agent behavioral profiles, computes per-session - demand proxy q̂ and divergence signals Δ_H, Δ_A for separability. - """ - - def __init__(self, cfg: ContaminatedArrivalConfig | None = None): - self.cfg = cfg or ContaminatedArrivalConfig() - self._alpha = self.cfg.alpha_contamination - self._scount = 0 - self._profiles: Dict[str, BehavioralProfile] = {} - self._ref_kernels: Dict[str, Dict] = {} # T̄_H, T̄_A reference kernels - self._session_demands: List[SessionDemand] = [] # collected session demands - - @property - def alpha(self) -> float: - return self._alpha - - def _profile(self, actor: str, pprobs: np.ndarray) -> BehavioralProfile: - key = actor - if key not in self._profiles: - ddir = self.cfg.agent_data_dir if actor == "agents" else self.cfg.human_data_dir - if not ddir and self.cfg.use_real_behavior: - base = Path(__file__).parent.parent.parent.parent / "experiments" - ddir = str(base / ("agents/collected_data" if actor == "agents" else "collected_data")) - profile = BehavioralProfile(actor, pprobs, ddir if self.cfg.use_real_behavior else "") - self._profiles[key] = profile - self._ref_kernels[key] = profile.trans # cache T̄_Y for divergence - return self._profiles[key] - - def get_ref_kernels(self) -> Tuple[Dict, Dict]: - """Return reference transition kernels T̄_H, T̄_A for divergence computation.""" - return (self._ref_kernels.get("humans", BehavioralProfile.FALLBACK_H), - self._ref_kernels.get("agents", BehavioralProfile.FALLBACK_A)) - - def get_session_demands(self) -> List[SessionDemand]: - """Return collected session demands for downstream analysis.""" - return self._session_demands - - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]: - """Sample arrivals as per Eq 3: mixture of human/agent demand distributions. - - For each session s, computes: - - Trajectory τ_s from behavioral profile sampling - - Demand proxy q̂ via weighted action aggregation (Eq 2) - - Divergence signals Δ_H, Δ_A for separability (Eq 20-21) - - Per-session contamination estimate α̂(τ') - """ - cfg = self.cfg - if cfg.alpha_drift != 0: - self._alpha = np.clip(self._alpha + cfg.alpha_drift * rng.normal(), *cfg.alpha_bounds) - hidden.contamination = self._alpha - - n_sess = poisson_arrivals(cfg.base_rate * hidden.true_demand_intensity, dt, rng) - prices, costs = instruments.refs, instruments.costs - margin = np.clip((prices - costs) / np.maximum(costs, 1e-3), -0.9, 2.0) - hprob, aprob = 0.08 * np.exp(-1.2 * margin), 0.05 * np.exp(-0.6 * margin) - ref_h, ref_a = self.get_ref_kernels() - - opps = [] - for _ in range(n_sess): - self._scount += 1 - sid = f"s{self._scount:06d}" - is_agent = rng.random() < self._alpha - actor, probs = ("agents", aprob) if is_agent else ("humans", hprob) - profile = self._profile(actor, probs) - events, fevts = profile.sample(rng, sid, prices, costs) - - # compute demand proxy q̂ per Eq 2 - q = compute_demand_proxy(events, instruments.n) - - # compute divergence signals Δ_H, Δ_A per Eq 20-21 - delta_h, delta_a = compute_session_divergence(events, ref_h, ref_a) - # per-session contamination estimate α̂(τ') = σ(β(Δ_H - Δ_A)) - alpha_hat = 1.0 / (1.0 + np.exp(-2.0 * (delta_h - delta_a))) if (delta_h + delta_a) > 0 else 0.5 - - theta = ({'price_sensitivity': rng.uniform(0.05, 0.2), 'base_conversion': 0.01, 'info_value': 1.0} if is_agent - else {'price_sensitivity': rng.uniform(1.5, 4.0), 'base_conversion': rng.uniform(0.2, 0.5), 'info_value': 0.0}) - - # store session demand for downstream analysis - self._session_demands.append(SessionDemand( - session_id=sid, q=q, trajectory=events, delta_h=delta_h, delta_a=delta_a, - alpha_hat=alpha_hat, actor_class="A" if is_agent else "H", theta=theta)) - - viewed = list({e["product_idx"] for e in events if "product_idx" in e}) - if not viewed: - vr = cfg.agent_views_range if is_agent else cfg.human_views_range - viewed = list(rng.choice(instruments.n, size=min(rng.integers(*vr), instruments.n), replace=False)) - - for vi, iid in enumerate(viewed): - opps.append(Opportunity( - id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY, - instrument_id=int(iid), size=1.0, t=t + rng.uniform(0, dt), - context={'session_id': sid, 'actor_class': 'AGENT' if is_agent else 'HUMAN', 'is_agent': is_agent, - 'reconnaissance_intent': is_agent, 'view_index': vi, 'total_views': len(viewed), - 'theta': theta, 'trajectory_events': fevts, 'mdp_trajectory': events, - 'demand_proxy': q, 'alpha_hat': alpha_hat, 'delta_h': delta_h, 'delta_a': delta_a})) - return opps - - -@dataclass -class AdversarialArrivalConfig: - base_rate: float = 5.0 - n_parallel_agents: int = 3 - query_all_products: bool = True - - -class AdversarialArrivalModel: - """Adversarial coordination (Theorem 1): as N->inf, COI->0.""" - - def __init__(self, cfg: AdversarialArrivalConfig | None = None): - self.cfg = cfg or AdversarialArrivalConfig() - self._qcount = 0 - - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, rng: np.random.Generator) -> list[Opportunity]: - cfg, opps = self.cfg, [] - for _ in range(poisson_arrivals(cfg.base_rate, dt, rng)): - self._qcount += 1 - for ai in range(cfg.n_parallel_agents): - sid = f"adv{self._qcount:06d}-{ai}" - prods = np.arange(instruments.n) if cfg.query_all_products else rng.choice(instruments.n, size=1) - for iid in prods: - opps.append(Opportunity( - id=f"{sid}-{iid}", type=OpportunityType.SESSION, side=Side.BUY, - instrument_id=int(iid), size=1.0, t=t, - context={'session_id': sid, 'actor_class': 'AGENT', 'is_agent': True, 'adversarial': True, - 'agent_index': ai, 'query_group': self._qcount, - 'theta': {'price_sensitivity': 0.0, 'base_conversion': 0.0, 'info_value': 1.0}})) - return opps diff --git a/lab/case/thesis/execution.py b/lab/case/thesis/execution.py deleted file mode 100644 index 5d2aa37..0000000 --- a/lab/case/thesis/execution.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Execution models with divergent H/A behavior using ground truth labels.""" -from __future__ import annotations -from dataclasses import dataclass -from typing import Any, Dict -import numpy as np -from ...outlet.types import Opportunity, Quote, InstrumentSet, MarketState -from ...outlet.math_util import sigmoid, safe_log, EPS - - -@dataclass -class HybridExecutionConfig: - human_base_prob: float = 0.3 - human_elasticity: float = 2.5 - agent_conversion: float = 0.01 - cross_elasticity: float = 0.4 - quality_weight: float = 0.2 - use_separability: bool = False - - -class HybridExecutionModel: - """Execution with divergent H/A behavior using ground truth labels.""" - - def __init__(self, cfg: HybridExecutionConfig | None = None): - self.cfg = cfg or HybridExecutionConfig() - - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - cfg, idx = self.cfg, int(opp.instrument_id) - price, ref, cost = float(quote.prices[idx]), float(instruments.refs[idx]), float(instruments.costs[idx]) - ctx = opp.context - theta = ctx.get('theta', {}) - is_agent = ctx.get('is_agent', False) - - if is_agent: - return cfg.agent_conversion * theta.get('base_conversion', 1.0) - - # human logit discrete choice - sens = theta.get('price_sensitivity', cfg.human_elasticity) - base = theta.get('base_conversion', cfg.human_base_prob) - u_price = -sens * safe_log(price / (ref + EPS)) - quality = instruments.instruments[idx].attrs.get('quality', 0.5) - u_quality = cfg.quality_weight * quality - - u_comp = 0.0 - if market and market.competitor_quotes is not None: - cp = market.competitor_quotes[idx] - if cp < price: - u_comp = -cfg.cross_elasticity * (price - cp) / ref - - utility = safe_log(base / (1 - base + EPS)) + u_price + u_quality + u_comp - return float(sigmoid(utility)) - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray: - if context is None: - return fills / (self.cfg.human_base_prob + EPS) - agent_frac = context.get('contamination', 0.0) - return fills / (self.cfg.human_base_prob * (1 - agent_frac) + EPS) - - -@dataclass -class SeparableExecutionConfig: - human_funnel: Dict[str, float] = None - agent_funnel: Dict[str, float] = None - - def __post_init__(self): - self.human_funnel = self.human_funnel or {'view_to_detail': 0.4, 'detail_to_cart': 0.3, 'cart_to_purchase': 0.6} - self.agent_funnel = self.agent_funnel or {'view_to_detail': 0.8, 'detail_to_cart': 0.05, 'cart_to_purchase': 0.1} - - -class SeparableExecutionModel: - """Execution with Markov funnel kernels using ground truth labels.""" - - def __init__(self, cfg: SeparableExecutionConfig | None = None): - self.cfg = cfg or SeparableExecutionConfig() - - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - is_agent = opp.context.get('is_agent', False) - probs = self.cfg.agent_funnel if is_agent else self.cfg.human_funnel - p = probs['view_to_detail'] * probs['detail_to_cart'] * probs['cart_to_purchase'] - - if not is_agent: - idx = int(opp.instrument_id) - price_ratio = quote.prices[idx] / (instruments.refs[idx] + EPS) - p *= np.exp(-0.5 * (price_ratio - 1.0)) - return float(np.clip(p, 0, 1)) - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, context: dict[str, Any] | None = None) -> np.ndarray: - h = self.cfg.human_funnel - exp_conv = h['view_to_detail'] * h['detail_to_cart'] * h['cart_to_purchase'] - return fills / (exp_conv + EPS) diff --git a/lab/case/thesis/metrics.py b/lab/case/thesis/metrics.py deleted file mode 100644 index 0cd9680..0000000 --- a/lab/case/thesis/metrics.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Thesis metrics for COI and behavioral analysis using ground truth labels.""" -from __future__ import annotations -from dataclasses import dataclass, field -from typing import Dict -import numpy as np -from ...outlet.types import StepLogs, StepMetrics, Quote, InstrumentSet -from ...outlet.math_util import safe_log, EPS - - -@dataclass -class COIMetrics: - coi_level: float = 0.0 - coi_leakage: float = 0.0 - realized_premium: float = 0.0 - theoretical_max: float = 0.0 - erosion_rate: float = 0.0 - - def to_dict(self) -> dict[str, float]: - return {k: getattr(self, k) for k in ['coi_level', 'coi_leakage', 'realized_premium', 'theoretical_max', 'erosion_rate']} - - -def compute_coi(quote: Quote, instruments: InstrumentSet, metrics: StepMetrics, contamination: float) -> COIMetrics: - prices, costs, refs = quote.prices, instruments.costs, instruments.refs - margins = prices - costs - coi_level = float(np.mean(margins)) - theoretical_max = float(np.mean(costs)) - realized_premium = (metrics.revenue - metrics.cost) / metrics.units_traded if metrics.units_traded > 0 else 0.0 - price_var = float(np.var(prices / refs)) - coi_leakage = contamination * (coi_level + price_var) - erosion_rate = contamination * coi_level / (theoretical_max + EPS) - return COIMetrics(coi_level=coi_level, coi_leakage=coi_leakage, realized_premium=realized_premium, - theoretical_max=theoretical_max, erosion_rate=erosion_rate) - - -@dataclass -class SeparabilityMetrics: - classification_accuracy: float = 0.0 - estimated_alpha: float = 0.0 - n_human_sessions: int = 0 - n_agent_sessions: int = 0 - - -def compute_separability(logs: StepLogs, true_alpha: float) -> SeparabilityMetrics: - """Compute separability using ground truth labels only.""" - if logs.events is None or len(logs.events) == 0: - return SeparabilityMetrics(estimated_alpha=true_alpha) - - sessions: Dict[str, bool] = {} - for evt in logs.events: - sid = evt.metadata.get('session_id', evt.opportunity_id) - if sid not in sessions: - sessions[sid] = evt.metadata.get('is_agent', False) - - n_agent = sum(1 for is_agent in sessions.values() if is_agent) - n_human = len(sessions) - n_agent - est_alpha = n_agent / len(sessions) if sessions else 0.0 - - return SeparabilityMetrics( - classification_accuracy=1.0, # ground truth is always correct - estimated_alpha=est_alpha, - n_human_sessions=n_human, - n_agent_sessions=n_agent) - - -@dataclass -class RevenueAttribution: - total_revenue: float = 0.0 - human_revenue: float = 0.0 - agent_revenue: float = 0.0 - human_conversion: float = 0.0 - agent_conversion: float = 0.0 - - -def compute_attribution(logs: StepLogs, metrics: StepMetrics) -> RevenueAttribution: - if logs.executions is None: - return RevenueAttribution(total_revenue=metrics.revenue) - - human_rev, agent_rev, human_cnt, agent_cnt = 0.0, 0.0, 0, 0 - for exe in logs.executions: - if exe.propensity < 0.05: - agent_rev += exe.price * exe.size_filled - agent_cnt += 1 - else: - human_rev += exe.price * exe.size_filled - human_cnt += 1 - - total_exp = logs.aggregates.get('n_arrivals', 1) - return RevenueAttribution( - total_revenue=metrics.revenue, human_revenue=human_rev, agent_revenue=agent_rev, - human_conversion=human_cnt / (total_exp * 0.8 + EPS), - agent_conversion=agent_cnt / (total_exp * 0.2 + EPS)) - - -def order_statistic_erosion(n_agents: int, price_variance: float) -> float: - """COI erosion from Theorem 1: as N->inf, min(p_1..p_N)->p_min.""" - if n_agents <= 1: - return 0.0 - sigma, log_n = np.sqrt(price_variance), safe_log(n_agents) - if log_n < 1: - return 0.0 - shift = sigma * (np.sqrt(2 * log_n) - (safe_log(log_n) + safe_log(4 * np.pi)) / (2 * np.sqrt(2 * log_n) + EPS)) - return float(min(shift / (sigma * 2 + EPS), 1.0)) diff --git a/lab/case/thesis/objectives.py b/lab/case/thesis/objectives.py deleted file mode 100644 index ba70320..0000000 --- a/lab/case/thesis/objectives.py +++ /dev/null @@ -1,228 +0,0 @@ -""" -Thesis-specific objectives implementing robust pricing under contamination. - -Implements the Maximin objective from Eq 23: -π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)] - -Key components: -- COIObjective: Cost of Information penalty (Definition 1) -- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness -- UXPenalty: User experience degradation from volatility -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from ...outlet.objectives.base import BaseObjective, CompositeObjective -from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation -from ...outlet.math_util import safe_log, EPS - -class COIObjective(BaseObjective): - """Cost of Information penalty from Definition 1. - - COI(π) = E[P] - p_min - - The expected price premium over marginal cost represents the platform's - pricing power. Agent reconnaissance erodes this by revealing price - distribution to buyers. - - We implement COI_leakage = f(τ') · InfoValue(p, τ') - where f(τ') is the estimated agent probability. - """ - - def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False): - """ - Args: - lambda_coi: Weight on COI penalty - use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices) - """ - self.lambda_coi = lambda_coi - self.use_revelation = use_revelation - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - # COI_leakage = α · InfoValue - alpha = hidden.contamination - - if self.use_revelation: - # revelation surrogate: rare prices reveal more about policy - # InfoValue = -log(π(p|τ')) ≈ surprise of the price - price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS)) - info_value = price_surprise - else: - # query-tax surrogate: each agent query incurs constant leakage - info_value = 1.0 - - leakage = alpha * info_value - return -self.lambda_coi * leakage - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - alpha = hidden.contamination - margins = (quote.prices - instruments.costs) / (instruments.costs + EPS) - return { - 'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs), - 'contamination': alpha, - 'avg_margin': float(np.mean(margins)), - } - -@dataclass -class RobustObjectiveConfig: - """Configuration for robust Stackelberg objective. - - Attributes: - lambda_coi: Weight on COI penalty (λ in Eq 23) - lambda_ux: Weight on UX penalty - lambda_volatility: Weight on price volatility penalty - gamma_inventory: Inventory risk aversion - wasserstein_epsilon: Ambiguity set radius (ε in Eq 21) - """ - lambda_coi: float = 0.5 - lambda_ux: float = 0.1 - lambda_volatility: float = 0.2 - gamma_inventory: float = 0.1 - wasserstein_epsilon: float = 0.1 - -class RobustStackelbergObjective(BaseObjective): - """Implements the Maximin Objective from thesis Eq 23. - - π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)] - - The objective balances: - 1. Revenue R(p,d) from human purchases - 2. COI penalty for information leakage to agents - 3. UX penalty for price volatility - 4. Inventory/holding costs - - The min over ambiguity set U_ε is approximated by penalizing - high contamination scenarios more heavily. - """ - - def __init__(self, cfg: RobustObjectiveConfig | None = None): - self.cfg = cfg or RobustObjectiveConfig() - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - cfg = self.cfg - - # 1. base revenue (R(p,d)) - revenue = metrics.revenue - cost = metrics.cost - profit = revenue - cost - - # 2. COI penalty: scales with contamination and margin extraction - # high margins + high contamination = high leakage - alpha = hidden.contamination - margins = quote.prices - instruments.costs - avg_margin = float(np.mean(margins)) - coi_penalty = cfg.lambda_coi * avg_margin * alpha - - # 3. UX penalty: price volatility harms legitimate users - volatility_penalty = cfg.lambda_volatility * metrics.volatility - - # 4. inventory/position cost - position_penalty = cfg.gamma_inventory * metrics.position_cost - - # 5. lost opportunity cost (stockouts) - lost_penalty = 0.1 * metrics.lost_opportunity - - # robust adjustment: under adversarial distribution Q, - # expect lower revenue and higher costs - # approximate via worst-case contamination within ε-ball - worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0) - robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha - - total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty - - return total - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - cfg = self.cfg - alpha = hidden.contamination - margins = quote.prices - instruments.costs - avg_margin = float(np.mean(margins)) - - return { - 'revenue': metrics.revenue, - 'cost': metrics.cost, - 'profit': metrics.revenue - metrics.cost, - 'coi_penalty': -cfg.lambda_coi * avg_margin * alpha, - 'volatility_penalty': -cfg.lambda_volatility * metrics.volatility, - 'position_penalty': -cfg.gamma_inventory * metrics.position_cost, - 'lost_penalty': -0.1 * metrics.lost_opportunity, - 'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0), - 'contamination': alpha, - 'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS), - } - -class UXPenalty(BaseObjective): - """User experience penalty from price volatility. - - High price volatility degrades UX for legitimate human users. - This term ensures the defense doesn't harm real customers while - protecting against agent reconnaissance. - """ - - def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1): - self.scale = scale - self.max_vol = max_acceptable_volatility - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - # penalty increases quadratically beyond threshold - excess_vol = max(0, metrics.volatility - self.max_vol) - return -self.scale * (excess_vol ** 2) - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return { - 'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs), - 'volatility': metrics.volatility, - } - -class AdaptiveObjective(BaseObjective): - """Objective that adapts weights based on estimated contamination. - - When contamination is low, focus on revenue maximization. - When contamination is high, increase COI defense weight. - """ - - def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0, - adaptation_rate: float = 2.0): - self.base_lambda = base_lambda_coi - self.max_lambda = max_lambda_coi - self.rate = adaptation_rate - - def _adaptive_lambda(self, alpha: float) -> float: - # sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5)) - from ...outlet.math_util import sigmoid - scale = sigmoid(self.rate * (alpha - 0.3)) - return self.base_lambda + (self.max_lambda - self.base_lambda) * scale - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - alpha = hidden.contamination - lambda_coi = self._adaptive_lambda(alpha) - - profit = metrics.revenue - metrics.cost - margins = quote.prices - instruments.costs - coi_penalty = lambda_coi * float(np.mean(margins)) * alpha - - return profit - coi_penalty - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - alpha = hidden.contamination - return { - 'profit': metrics.revenue - metrics.cost, - 'adaptive_lambda': self._adaptive_lambda(alpha), - 'contamination': alpha, - } - -def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1, - lambda_vol: float = 0.2) -> CompositeObjective: - """Create the standard thesis objective composition.""" - return CompositeObjective([ - (RobustStackelbergObjective(RobustObjectiveConfig( - lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0), - ]) diff --git a/lab/case/thesis/platform.py b/lab/case/thesis/platform.py deleted file mode 100644 index ec00da5..0000000 --- a/lab/case/thesis/platform.py +++ /dev/null @@ -1,176 +0,0 @@ -"""Thesis platform with real MDP behavioral models and separability scoring.""" -from __future__ import annotations -from dataclasses import dataclass -from pathlib import Path -import numpy as np -from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig, - PostedPriceMechanism, make_instruments, InstrumentType, LogLevel) -from ...outlet.mechanisms.posted_price import PostedPriceConfig -from ...outlet.observation import DefaultObservationBuilder, ObservationConfig -from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig -from .execution import HybridExecutionModel, HybridExecutionConfig -from .objectives import RobustStackelbergObjective, RobustObjectiveConfig - - -@dataclass -class ThesisConfig: - # instruments - n_instruments: int = 10 - cost_range: tuple[float, float] = (5.0, 50.0) - margin_range: tuple[float, float] = (0.2, 0.5) - - # contamination (Section 3.1) - alpha_contamination: float = 0.2 - alpha_drift: float = 0.0 - alpha_bounds: tuple[float, float] = (0.0, 0.5) - - # objectives (Eq 23) - lambda_coi: float = 0.5 - lambda_ux: float = 0.1 - lambda_volatility: float = 0.2 - wasserstein_epsilon: float = 0.1 - - # arrivals - sessions_per_step: int = 30 - human_views_range: tuple[int, int] = (1, 4) - agent_views_range: tuple[int, int] = (3, 10) - - # inventory - initial_inventory: float = 100.0 - holding_cost_rate: float = 0.002 - - # real behavioral models (from sim.rl) - use_real_behavior: bool = True - use_separability: bool = False # disabled until classifier trained - human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data" - agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data" - - # simulation - max_steps: int = 500 - seed: int | None = 24 - log_level: LogLevel = LogLevel.AGG_ONLY - - -def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]: - """Resolve data directories for behavioral models.""" - base = Path(__file__).parent.parent.parent.parent / "experiments" - human = cfg.human_data_dir or str(base / "collected_data") - agent = cfg.agent_data_dir or str(base / "agents/collected_data") - return human, agent - - -def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform: - """Create platform with real MDP behavioral models. - - Implements: - - Contaminated arrivals using learned MDP kernels from behavior_loader - - Hybrid execution with real separability scoring from lib.separability - - Robust Stackelberg objective (Eq 23) - """ - cfg = cfg or ThesisConfig() - rng = np.random.default_rng(cfg.seed) - human_dir, agent_dir = _resolve_data_dirs(cfg) - - instruments = make_instruments( - n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range, - inst_type=InstrumentType.SKU, rng=rng) - instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory) - - arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig( - base_rate=cfg.sessions_per_step, - alpha_contamination=cfg.alpha_contamination, - alpha_drift=cfg.alpha_drift, - alpha_bounds=cfg.alpha_bounds, - human_views_range=cfg.human_views_range, - agent_views_range=cfg.agent_views_range, - use_real_behavior=cfg.use_real_behavior, - human_data_dir=human_dir, - agent_data_dir=agent_dir, - )) - - execution = HybridExecutionModel(HybridExecutionConfig( - use_separability=cfg.use_separability, - )) - - mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05)) - position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate)) - - market = None - objective = RobustStackelbergObjective(RobustObjectiveConfig( - lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux, - lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon)) - - obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True)) - platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps, - seed=cfg.seed, log_level=cfg.log_level, mask_demand=True) - - return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution, - position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg) - - -@dataclass -class AblationConfig(ThesisConfig): - disable_coi_penalty: bool = False - disable_ux_penalty: bool = False - disable_contamination: bool = False - disable_real_behavior: bool = False - - -def make_ablation_platform(cfg: AblationConfig) -> Platform: - if cfg.disable_coi_penalty: - cfg.lambda_coi = 0.0 - if cfg.disable_ux_penalty: - cfg.lambda_ux = 0.0 - if cfg.disable_contamination: - cfg.alpha_contamination = 0.0 - if cfg.disable_real_behavior: - cfg.use_real_behavior = False - cfg.use_separability = False - return make_thesis_platform(cfg) - - -def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None, - n_steps: int = 100, seed: int = 42) -> dict[float, dict]: - """Test performance across contamination levels (Theorem 1 validation).""" - from ...experiments.eval import rollout, fixed_price_policy - - results = {} - base_cfg = base_cfg or ThesisConfig() - - for alpha in alpha_values: - cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'}, - alpha_contamination=alpha) - platform = make_thesis_platform(cfg) - policy = fixed_price_policy(platform.instruments.refs) - result = rollout(platform, policy, n_steps, seed=seed) - results[alpha] = { - 'total_reward': result.total_reward, - 'total_pnl': result.total_pnl, - 'avg_conversion': result.avg_conversion, - 'final_contamination': platform._hidden.contamination, - } - return results - - -def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]: - """Compare real vs synthetic behavioral models.""" - from ...experiments.eval import rollout, fixed_price_policy - - base_cfg = base_cfg or ThesisConfig() - modes = { - 'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}), - 'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}), - 'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}), - } - - results = {} - for name, cfg in modes.items(): - platform = make_thesis_platform(cfg) - policy = fixed_price_policy(platform.instruments.refs) - result = rollout(platform, policy, n_steps, seed=seed) - results[name] = { - 'total_reward': result.total_reward, - 'total_pnl': result.total_pnl, - 'avg_conversion': result.avg_conversion, - } - return results diff --git a/lab/case/thesis/run_experiment.py b/lab/case/thesis/run_experiment.py deleted file mode 100644 index 962db4f..0000000 --- a/lab/case/thesis/run_experiment.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -"""Thesis simulation experiments with real MDP behavioral models.""" -from __future__ import annotations -import sys -from pathlib import Path - -if __name__ == '__main__': - sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) - -from lab.case.thesis.platform import make_thesis_platform, ThesisConfig -from lab.case.thesis.metrics import compute_coi, compute_separability -from lab.experiments.eval import compare_policies -import numpy as np - - -def demo_basic_simulation(): - print("=" * 70) - print("THESIS SIMULATION: Contaminated Dynamic Pricing (Real MDP Kernels)") - print("=" * 70) - - cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, lambda_coi=0.5, - max_steps=100, seed=42, use_real_behavior=True) - platform = make_thesis_platform(cfg) - - print(f"\nInstruments: {platform.instruments.n}") - print(f"Reference prices: {platform.instruments.refs.round(2)}") - print(f"Costs: {platform.instruments.costs.round(2)}") - print(f"Initial contamination alpha={cfg.alpha_contamination}") - print(f"Using real behavior: {cfg.use_real_behavior}") - - result = platform.reset(seed=42) - total_reward, coi_history = 0, [] - - print(f"\n{'Step':>5} {'Reward':>10} {'PnL':>10} {'COI':>8} {'alpha':>6} {'Conv':>8}") - print("-" * 55) - - for t in range(cfg.max_steps): - action = platform.instruments.refs * np.random.uniform(0.95, 1.15, size=platform.instruments.n) - result = platform.step(action) - total_reward += result.reward - coi = compute_coi(platform._quote, platform.instruments, result.metrics, result.hidden.contamination) - coi_history.append(coi.coi_level) - - if t % 20 == 0: - print(f"{t:5d} {result.reward:10.2f} {result.metrics.pnl:10.2f} " - f"{coi.coi_level:8.2f} {result.hidden.contamination:6.2f} {result.metrics.conversion:8.3f}") - - print("-" * 55) - print(f"Total Reward: {total_reward:.2f}") - print(f"Average COI: {np.mean(coi_history):.2f}") - print(f"COI Trend: {coi_history[-1] - coi_history[0]:+.2f}") - - -def demo_contamination_sweep(): - print("\n" + "=" * 70) - print("EXPERIMENT: COI Erosion vs Contamination (Theorem 1)") - print("=" * 70) - - from lab.case.thesis.platform import sweep_contamination - trials = 20 - alpha_values = [i/trials for i in range(trials)] - results = sweep_contamination(alpha_values, n_steps=100, seed=42) - - print(f"\n{'alpha':>6} {'Reward':>12} {'PnL':>12} {'Conv':>10}") - print("-" * 45) - for alpha, m in sorted(results.items()): - print(f"{alpha:6.2f} {m['total_reward']:12.2f} {m['total_pnl']:12.2f} {m['avg_conversion']:10.3f}") - - rewards = [results[a]['total_reward'] for a in sorted(results.keys())] - dataset = np.array([[a, r] for a, r in zip(alpha_values, rewards)]) - trend = np.corrcoef(dataset[:, 0], dataset[:, 1])[0, 1] - print(f"Trend (alpha~reward correlation): {trend:.3f}") - - -def demo_policy_comparison(): - print("\n" + "=" * 70) - print("EXPERIMENT: Policy Comparison under Contamination") - print("=" * 70) - - cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.25, max_steps=100, seed=42) - platform = make_thesis_platform(cfg) - - def fixed_policy(obs, t): return platform.instruments.refs.copy(), 1.0 - def aggressive_policy(obs, t): return platform.instruments.refs * 1.3, 1.0 - def conservative_policy(obs, t): return platform.instruments.refs * 1.05, 1.0 - def adaptive_policy(obs, t): - fills = obs[platform.instruments.n:2*platform.instruments.n] - exp = obs[2*platform.instruments.n:3*platform.instruments.n] - conv = np.sum(fills) / (np.sum(exp) + 1e-8) - return platform.instruments.refs * (1.0 + 0.2 * conv), 1.0 - - policies = {'fixed': fixed_policy, 'aggressive': aggressive_policy, - 'conservative': conservative_policy, 'adaptive': adaptive_policy} - results = compare_policies(platform, policies, n_steps=100, n_runs=3, seed=42) - - print(f"\n{'Policy':>15} {'Reward':>12} {'Std':>10} {'PnL':>12} {'Conv':>10}") - print("-" * 65) - for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_reward']): - print(f"{name:>15} {r['mean_reward']:12.2f} {r['std_reward']:10.2f} " - f"{r['mean_pnl']:12.2f} {r['mean_conversion']:10.3f}") - - -def demo_session_analysis(): - """Analyze session-level behavior from MDP trajectories.""" - print("\n" + "=" * 70) - print("EXPERIMENT: Session Analysis (Ground Truth)") - print("=" * 70) - - from lab.outlet.constants import LogLevel - cfg = ThesisConfig(n_instruments=5, alpha_contamination=0.3, max_steps=50, - log_level=LogLevel.FULL, seed=42, use_real_behavior=True) - platform = make_thesis_platform(cfg) - - result = platform.reset(seed=42) - human_sessions, agent_sessions = 0, 0 - - for t in range(cfg.max_steps): - action = platform.instruments.refs * 1.1 - result = platform.step(action) - sep = compute_separability(result.logs, result.hidden.contamination) - human_sessions += sep.n_human_sessions - agent_sessions += sep.n_agent_sessions - - total = human_sessions + agent_sessions - print(f"\nTotal sessions: {total}") - print(f"Human sessions: {human_sessions} ({100*human_sessions/total:.1f}%)") - print(f"Agent sessions: {agent_sessions} ({100*agent_sessions/total:.1f}%)") - print(f"True contamination: {cfg.alpha_contamination:.1%}") - print(f"Observed contamination: {agent_sessions/total:.1%}") - - -if __name__ == '__main__': - demo_basic_simulation() - demo_contamination_sweep() - # demo_policy_comparison() - # demo_session_analysis() diff --git a/lab/config.py b/lab/config.py deleted file mode 100644 index 441085d..0000000 --- a/lab/config.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Configuration and factory functions for creating pre-configured platforms. - -This module provides: -- RetailConfig, MarketMakingConfig: Configuration dataclasses -- make_retail_platform: Factory for retail dynamic pricing scenarios -- make_market_making_platform: Factory for market making scenarios - -Example: - >>> from lab.config import make_retail_platform - >>> platform = make_retail_platform(RetailConfig(n_instruments=5)) - >>> result = platform.reset(seed=42) -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from .outlet import (Platform, PlatformConfig, PositionModel, PositionConfig, - PostedPriceMechanism, TwoSidedMechanism, make_instruments, - InstrumentType, LogLevel) -from .outlet.mechanisms.posted_price import PostedPriceConfig -from .outlet.mechanisms.two_sided import TwoSidedConfig -from .population import (SessionArrivalModel, PoissonArrivalModel, HawkesArrivalModel, - ElasticityExecutionModel, IntensityExecutionModel, - ReactiveCompetitorModel, GBMMarketModel) -from .population.arrivals import SessionArrivalConfig, PoissonArrivalConfig, HawkesArrivalConfig -from .population.execution import ElasticityConfig, IntensityConfig -from .population.competitors import ReactiveCompetitorConfig, GBMMarketConfig -from .outlet.objectives.factory import retail_objective, market_making_objective - -@dataclass -class RetailConfig: - """Configuration for retail dynamic pricing scenario. - - Attributes: - n_instruments: Number of products to price - cost_range: (min, max) for random product costs - margin_range: (min, max) for random initial margins - initial_inventory: Starting inventory per product - holding_cost_rate: Cost per unit per step for holding - sessions_per_step: Number of browsing sessions per step - contamination: Fraction of sessions that are scrapers - max_steps: Maximum episode length - seed: Random seed for reproducibility - """ - n_instruments: int = 10 - cost_range: tuple[float, float] = (5.0, 50.0) - margin_range: tuple[float, float] = (0.2, 0.5) - initial_inventory: float = 100.0 - holding_cost_rate: float = 0.002 - sessions_per_step: int = 30 - contamination: float = 0.1 - max_steps: int = 500 - seed: int | None = None - -def make_retail_platform(cfg: RetailConfig | None = None) -> Platform: - """Create a pre-configured retail dynamic pricing platform. - - Components: - - Mechanism: PostedPriceMechanism (single price per product) - - Arrivals: SessionArrivalModel (browsing sessions with views) - - Execution: ElasticityExecutionModel (price sensitivity) - - Market: ReactiveCompetitorModel (can trigger price wars) - - Objective: PnL - holding_cost - volatility - lost_opportunity - - Args: - cfg: Configuration (uses defaults if None) - - Returns: - Configured Platform instance - """ - cfg = cfg or RetailConfig() - rng = np.random.default_rng(cfg.seed) - - instruments = make_instruments(cfg.n_instruments, cfg.cost_range, cfg.margin_range, - InstrumentType.SKU, rng) - instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory) - - mechanism = PostedPriceMechanism(PostedPriceConfig()) - arrival = SessionArrivalModel(SessionArrivalConfig( - sessions_per_step=cfg.sessions_per_step, contamination=cfg.contamination)) - execution = ElasticityExecutionModel(ElasticityConfig()) - position = PositionModel(PositionConfig( - initial_position=cfg.initial_inventory, - holding_cost_rate=cfg.holding_cost_rate)) - market = ReactiveCompetitorModel(ReactiveCompetitorConfig(), refs=instruments.refs) - objective = retail_objective() - - return Platform( - instruments=instruments, mechanism=mechanism, arrival=arrival, - execution=execution, position=position, market=market, objective=objective, - cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps, - seed=cfg.seed, log_level=LogLevel.AGG_ONLY) - ) - -@dataclass -class MarketMakingConfig: - """Configuration for market making scenario. - - Attributes: - n_instruments: Number of assets to quote - initial_mid: Initial mid-price for assets - mu: Price drift (expected return) - sigma: Price volatility - gamma: Inventory risk aversion parameter - base_arrival_rate: Order arrival rate (Hawkes baseline) - max_steps: Maximum episode length - seed: Random seed for reproducibility - """ - n_instruments: int = 5 - initial_mid: float = 100.0 - mu: float = 0.0 - sigma: float = 0.02 - gamma: float = 0.1 - base_arrival_rate: float = 20.0 - max_steps: int = 1000 - seed: int | None = None - -def make_market_making_platform(cfg: MarketMakingConfig | None = None) -> Platform: - """Create a pre-configured market making platform. - - Components: - - Mechanism: TwoSidedMechanism (bid-ask spread quoting) - - Arrivals: HawkesArrivalModel (clustered order flow) - - Execution: IntensityExecutionModel (distance-based fills) - - Market: GBMMarketModel (geometric Brownian motion mid-prices) - - Objective: PnL + spread_capture - inventory_risk - - Args: - cfg: Configuration (uses defaults if None) - - Returns: - Configured Platform instance - """ - cfg = cfg or MarketMakingConfig() - rng = np.random.default_rng(cfg.seed) - - instruments = make_instruments(cfg.n_instruments, (cfg.initial_mid*0.9, cfg.initial_mid*1.1), - (0.0, 0.0), InstrumentType.ASSET, rng) - instruments.position = np.zeros(cfg.n_instruments) - - mechanism = TwoSidedMechanism(TwoSidedConfig()) - arrival = HawkesArrivalModel(HawkesArrivalConfig(base_rate=cfg.base_arrival_rate)) - execution = IntensityExecutionModel(IntensityConfig()) - position = PositionModel(PositionConfig( - initial_position=0.0, min_position=-500, max_position=500, - holding_cost_rate=0.0)) # use inventory risk penalty instead - market = GBMMarketModel(GBMMarketConfig(mu=cfg.mu, sigma=cfg.sigma), - initial=instruments.refs) - objective = market_making_objective(gamma=cfg.gamma, sigma=cfg.sigma) - - return Platform( - instruments=instruments, mechanism=mechanism, arrival=arrival, - execution=execution, position=position, market=market, objective=objective, - cfg=PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps, - seed=cfg.seed, log_level=LogLevel.AGG_ONLY) - ) diff --git a/lab/docs/Makefile b/lab/docs/Makefile deleted file mode 100644 index fe8e88c..0000000 --- a/lab/docs/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/lab/docs/conf.py b/lab/docs/conf.py deleted file mode 100644 index 0e39351..0000000 --- a/lab/docs/conf.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import sys -sys.path.insert(0, os.path.abspath('../..')) - -project = 'Quote-Control Simulator' -copyright = '2025, PHANTOM Research' -author = 'PHANTOM Research' -release = '0.1.0' - -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'sphinx.ext.autosummary', -] - -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -html_theme = 'alabaster' -html_static_path = ['_static'] - -autodoc_default_options = { - 'members': True, - 'undoc-members': True, - 'show-inheritance': True, -} - -napoleon_google_docstring = True -napoleon_numpy_docstring = True -napoleon_include_init_with_doc = True - -intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('https://numpy.org/doc/stable/', None), -} - -autosummary_generate = True diff --git a/lab/docs/index.rst b/lab/docs/index.rst deleted file mode 100644 index bd36ecd..0000000 --- a/lab/docs/index.rst +++ /dev/null @@ -1,40 +0,0 @@ -Quote-Control Simulator -======================= - -Research-grade platform for dynamic pricing and market making experiments. - -The platform abstracts pricing as: **Quote → Arrival → Execution → Position** - -Supports multiple mechanisms: - -* **PostedPrice**: retail dynamic pricing -* **TwoSided**: market making with bid-ask spreads -* **Auction**: reserve/shading for auction settings - -Quick Start ------------ - -.. code-block:: python - - from lab.config import make_retail_platform - from lab.experiments import rollout, fixed_price_policy - - platform = make_retail_platform() - policy = fixed_price_policy(platform.instruments.refs) - result = rollout(platform, policy, n_steps=100) - print(f"Total PnL: {result.total_pnl:.2f}") - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - system_overview - modules/outlet - modules/population - modules/experiments - -Indices -------- - -* :ref:`genindex` -* :ref:`modindex` diff --git a/lab/docs/modules/experiments.rst b/lab/docs/modules/experiments.rst deleted file mode 100644 index c71ee36..0000000 --- a/lab/docs/modules/experiments.rst +++ /dev/null @@ -1,14 +0,0 @@ -Experiments -=========== - -Evaluation & OPE ----------------- - -.. automodule:: lab.experiments.eval - :members: - -Configuration -------------- - -.. automodule:: lab.config - :members: diff --git a/lab/docs/modules/outlet.rst b/lab/docs/modules/outlet.rst deleted file mode 100644 index 9f3b8c3..0000000 --- a/lab/docs/modules/outlet.rst +++ /dev/null @@ -1,77 +0,0 @@ -Outlet (Core Simulator) -======================= - -Types ------ - -.. automodule:: lab.outlet.types - :members: - -Constants ---------- - -.. automodule:: lab.outlet.constants - :members: - -Protocols ---------- - -.. automodule:: lab.outlet.protocols - :members: - -Platform --------- - -.. automodule:: lab.outlet.platform - :members: - -Stock & Position ----------------- - -.. automodule:: lab.outlet.stock - :members: - -Observation ------------ - -.. automodule:: lab.outlet.observation - :members: - -Mechanisms ----------- - -Posted Price -~~~~~~~~~~~~ - -.. automodule:: lab.outlet.mechanisms.posted_price - :members: - -Two-Sided (Market Making) -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. automodule:: lab.outlet.mechanisms.two_sided - :members: - -Auction -~~~~~~~ - -.. automodule:: lab.outlet.mechanisms.auction - :members: - -Objectives ----------- - -.. automodule:: lab.outlet.objectives.base - :members: - -.. automodule:: lab.outlet.objectives.penalties - :members: - -.. automodule:: lab.outlet.objectives.factory - :members: - -Math Utilities --------------- - -.. automodule:: lab.outlet.math_util - :members: diff --git a/lab/docs/modules/population.rst b/lab/docs/modules/population.rst deleted file mode 100644 index 0b7ef75..0000000 --- a/lab/docs/modules/population.rst +++ /dev/null @@ -1,20 +0,0 @@ -Population Models -================= - -Arrival Models --------------- - -.. automodule:: lab.population.arrivals - :members: - -Execution Models ----------------- - -.. automodule:: lab.population.execution - :members: - -Competitor / Market Models --------------------------- - -.. automodule:: lab.population.competitors - :members: diff --git a/lab/docs/system_overview.rst b/lab/docs/system_overview.rst deleted file mode 100644 index 3fda8ad..0000000 --- a/lab/docs/system_overview.rst +++ /dev/null @@ -1,97 +0,0 @@ -System Overview -=============== - -The simulator organises dynamic pricing and market-making experiments as a -closed loop with the following stages: - -* **Quote** – a policy or agent emits a :class:`lab.outlet.types.Quote`. The - quote is normalised and validated by a concrete - :class:`lab.outlet.protocols.Mechanism` implementation - (posted-price, two-sided, auction). -* **Arrival** – a :class:`lab.outlet.protocols.ArrivalModel` samples a stream of - :class:`lab.outlet.types.Opportunity` objects given the current time, - instrument catalogue, and market state. -* **Execution** – the :class:`lab.outlet.protocols.ExecutionModel` converts an - opportunity into a probabilistic fill using the active quote, optional - competitor prices, and demand-side context. -* **Position** – a :class:`lab.outlet.protocols.PositionModel` enforces - inventory or position constraints, censors oversized fills, and accrues - holding and shortage costs. -* **Observation & Reward** – the - :class:`lab.outlet.protocols.ObservationBuilder` constructs the censored view - exposed to the agent, while a :class:`lab.outlet.protocols.Objective` - transforms :class:`lab.outlet.types.StepMetrics` into a scalar reward with an - optional breakdown per term. - -These components are orchestrated by :class:`lab.outlet.platform.Platform`, -which manages internal hidden state, deterministic seeding, and logging. - -Component Matrix ----------------- - -=============================== ============================================== -Layer Responsibilities / Examples -=============================== ============================================== -Mechanisms Quote normalisation, execution semantics - (`posted_price`, `two_sided`, `auction`). -Population models Arrivals (:mod:`lab.population.arrivals`), - execution probability models - (:mod:`lab.population.execution`), and - competitor or market dynamics - (:mod:`lab.population.competitors`). -Position management Inventory limits, replenishment, holding and - shortage costs (:mod:`lab.outlet.stock`). -Observation & logging Censored observations and optional event logs - (:mod:`lab.outlet.observation`). -Objectives Reward composition utilities - (:mod:`lab.outlet.objectives`). -Experiments Rollout helpers, baseline policies, off-policy - evaluation (:mod:`lab.experiments.eval`). -=============================== ============================================== - -Preconfigured Platforms ------------------------ - -Two high-level factories in :mod:`lab.config` wire common combinations of the -building blocks: - -* **Retail dynamic pricing** – posted-price mechanism, session arrivals with - contamination, elasticity-based executions, reactive competitor model, and a - composite objective that penalises volatility, holding costs, and lost - opportunities. -* **Market making** – two-sided quoting, Hawkes order flow, intensity-based - executions, geometric Brownian motion mid-prices, and an objective combining - PnL, spread capture, and quadratic inventory risk. - -State & Reset Behaviour ------------------------ - -When you call :meth:`lab.outlet.platform.Platform.reset`, the platform resets -instrument positions, quotes, and hidden state, but component implementations -may maintain their own internal buffers. For reproducible experiments: - -* Reuse freshly instantiated arrival/market models per episode, or add explicit - ``reset`` methods if the model keeps history (for example, - :class:`lab.population.arrivals.HawkesArrivalModel` maintains an event - history, while :class:`lab.population.competitors.ReactiveCompetitorModel` - tracks prior competitor quotes). -* Seed randomness through the factory configuration (``RetailConfig.seed`` or - ``MarketMakingConfig.seed``) or pass a seed to ``Platform.reset`` for - deterministic rollouts. - -Extending the Platform ----------------------- - -To support a new domain: - -1. Create custom Mechanism/Arrival/Execution/Market/Observation components by - implementing the respective protocol in :mod:`lab.outlet.protocols`. -2. Compose a new objective with - :func:`lab.outlet.objectives.factory.make_composite` or write a bespoke - :class:`lab.outlet.objectives.base.BaseObjective`. -3. Wire everything together via :class:`lab.outlet.platform.Platform` directly - or expose a helper factory in :mod:`lab.config`. - -Use :func:`lab.experiments.rollout` and -:func:`lab.experiments.compare_policies` to benchmark candidate policies under -multiple random seeds, collecting per-step logs for analysis or OPE. diff --git a/lab/experiments/__init__.py b/lab/experiments/__init__.py deleted file mode 100644 index ac427f3..0000000 --- a/lab/experiments/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .eval import (rollout, RolloutResult, compare_policies, compute_ips, OPEResult, - fixed_price_policy, cost_plus_margin_policy, random_walk_policy, epsilon_greedy_policy) - -__all__ = [ - 'rollout', 'RolloutResult', 'compare_policies', 'compute_ips', 'OPEResult', - 'fixed_price_policy', 'cost_plus_margin_policy', 'random_walk_policy', 'epsilon_greedy_policy', -] diff --git a/lab/experiments/eval.py b/lab/experiments/eval.py deleted file mode 100644 index 8bc9330..0000000 --- a/lab/experiments/eval.py +++ /dev/null @@ -1,213 +0,0 @@ -""" -Evaluation utilities for policy testing and off-policy evaluation. - -This module provides: -- rollout: Run a policy on the platform for multiple steps -- compare_policies: Compare multiple policies with statistics -- Baseline policies: fixed_price, cost_plus_margin, random_walk, epsilon_greedy -- OPE estimators: IPS and SNIPS for off-policy evaluation - -Example: - >>> from lab.config import make_retail_platform - >>> from lab.experiments.eval import rollout, fixed_price_policy - >>> platform = make_retail_platform() - >>> policy = fixed_price_policy(platform.instruments.refs) - >>> result = rollout(platform, policy, n_steps=100) - >>> print(f"Total PnL: {result.total_pnl:.2f}") -""" -from __future__ import annotations -from dataclasses import dataclass -from typing import Callable, Any -import numpy as np -from ..outlet.platform import Platform -from ..outlet.types import StepResult, StepLogs, Quote - -# Policy signature: takes (observation_flat, timestep) -> (action_prices, propensity) -Policy = Callable[[np.ndarray, int], tuple[np.ndarray, float]] - -@dataclass -class RolloutResult: - """Results from a policy rollout. - - Attributes: - rewards: Per-step rewards - metrics: Per-step StepMetrics objects - logs: Per-step StepLogs objects - total_reward: Sum of rewards - total_pnl: Sum of PnL from metrics - avg_conversion: Average conversion rate - """ - rewards: list[float] - metrics: list[Any] - logs: list[StepLogs] - total_reward: float - total_pnl: float - avg_conversion: float - -def rollout(platform: Platform, policy: Policy, n_steps: int, seed: int | None = None) -> RolloutResult: - """Execute a policy on the platform for n_steps. - - Args: - platform: The simulation platform - policy: Function (obs, t) -> (action, propensity) - n_steps: Number of steps to run - seed: Random seed for reproducibility - - Returns: - RolloutResult with rewards, metrics, and summary statistics - """ - result = platform.reset(seed) - rewards, metrics, logs = [], [], [] - - for t in range(n_steps): - obs_flat = result.obs.to_flat() - action, propensity = policy(obs_flat, t) - result = platform.step(action, propensity) - rewards.append(result.reward) - metrics.append(result.metrics) - logs.append(result.logs) - if result.terminated or result.truncated: - break - - return RolloutResult( - rewards=rewards, metrics=metrics, logs=logs, - total_reward=sum(rewards), - total_pnl=sum(m.pnl for m in metrics), - avg_conversion=np.mean([m.conversion for m in metrics]) - ) - -# Baseline policies for comparison - -def fixed_price_policy(refs: np.ndarray) -> Policy: - """Policy that always quotes at reference prices.""" - def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]: - return refs.copy(), 1.0 - return policy - -def cost_plus_margin_policy(costs: np.ndarray, margin: float = 0.3) -> Policy: - """Policy that quotes at cost * (1 + margin).""" - prices = costs * (1 + margin) - def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]: - return prices.copy(), 1.0 - return policy - -def random_walk_policy(refs: np.ndarray, volatility: float = 0.05, - rng: np.random.Generator | None = None) -> Policy: - """Policy that performs a random walk around reference prices.""" - rng = rng or np.random.default_rng() - prices = refs.copy() - def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]: - nonlocal prices - delta = rng.normal(0, volatility, len(prices)) - prices = prices * (1 + delta) - prices = np.clip(prices, refs * 0.5, refs * 2.0) - return prices.copy(), 1.0 - return policy - -def epsilon_greedy_policy(base_policy: Policy, refs: np.ndarray, - epsilon: float = 0.1, rng: np.random.Generator | None = None) -> Policy: - """Wrap a policy with epsilon-greedy exploration.""" - rng = rng or np.random.default_rng() - def policy(obs: np.ndarray, t: int) -> tuple[np.ndarray, float]: - if rng.random() < epsilon: - action = refs * rng.uniform(0.8, 1.2, len(refs)) - return action, epsilon / len(refs) - else: - action, _ = base_policy(obs, t) - return action, 1 - epsilon - return policy - -# Off-Policy Evaluation (OPE) - -@dataclass -class OPEResult: - """Results from off-policy evaluation. - - Attributes: - ips_estimate: Inverse Propensity Scoring estimate - snips_estimate: Self-normalized IPS estimate (more stable) - n_samples: Number of samples used - effective_samples: Effective sample size (accounts for variance) - """ - ips_estimate: float - snips_estimate: float - n_samples: int - effective_samples: float - -def compute_ips(logs: list[StepLogs], rewards: list[float], - target_policy: Policy, behavior_propensities: list[float] | None = None) -> OPEResult: - """Compute IPS and SNIPS estimators for off-policy evaluation. - - Uses logged propensities to estimate expected reward under a target - policy from data collected under a behavior policy. - - Args: - logs: Step logs containing propensities - rewards: Observed rewards from behavior policy - target_policy: Policy to evaluate (not currently used, assumes deterministic) - behavior_propensities: Override propensities if not in logs - - Returns: - OPEResult with IPS, SNIPS estimates and sample statistics - """ - if behavior_propensities is None: - # extract from logs - behavior_propensities = [] - for log in logs: - if log.executions: - avg_prop = np.mean([e.propensity for e in log.executions]) - else: - avg_prop = 1.0 - behavior_propensities.append(avg_prop) - - # compute importance weights - weights = [] - for i, (log, bp) in enumerate(zip(logs, behavior_propensities)): - # target propensity would need obs reconstruction - simplified here - tp = 1.0 # assume deterministic target - w = tp / (bp + 1e-8) - weights.append(w) - - weights = np.array(weights) - rewards = np.array(rewards) - - # IPS estimate - ips = np.sum(weights * rewards) / len(rewards) - - # SNIPS (self-normalized) - snips = np.sum(weights * rewards) / (np.sum(weights) + 1e-8) - - # effective sample size - ess = (np.sum(weights) ** 2) / (np.sum(weights ** 2) + 1e-8) - - return OPEResult(ips_estimate=ips, snips_estimate=snips, - n_samples=len(rewards), effective_samples=ess) - -def compare_policies(platform: Platform, policies: dict[str, Policy], - n_steps: int = 100, n_runs: int = 5, seed: int = 42) -> dict[str, dict]: - """Compare multiple policies with statistical summary. - - Args: - platform: Simulation platform - policies: Dict mapping policy names to policy functions - n_steps: Steps per rollout - n_runs: Number of rollouts per policy (different seeds) - seed: Base random seed - - Returns: - Dict mapping policy names to result dicts with mean/std statistics - """ - results = {} - for name, policy in policies.items(): - run_results = [] - for i in range(n_runs): - r = rollout(platform, policy, n_steps, seed=seed + i) - run_results.append(r) - - results[name] = { - 'mean_reward': np.mean([r.total_reward for r in run_results]), - 'std_reward': np.std([r.total_reward for r in run_results]), - 'mean_pnl': np.mean([r.total_pnl for r in run_results]), - 'mean_conversion': np.mean([r.avg_conversion for r in run_results]), - } - return results diff --git a/lab/outlet/__init__.py b/lab/outlet/__init__.py deleted file mode 100644 index 11a8d76..0000000 --- a/lab/outlet/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from .constants import Side, MechanismType, InstrumentType, OpportunityType, EventType, LogLevel -from .types import (Instrument, InstrumentSet, Quote, Opportunity, Execution, - StepEvent, StepLogs, StepMetrics, MarketState, HiddenState, Observation, StepResult) -from .stock import PositionModel, PositionConfig, make_instruments -from .platform import Platform, PlatformConfig -from .observation import DefaultObservationBuilder, ObservationConfig -from .mechanisms import PostedPriceMechanism, TwoSidedMechanism, AuctionMechanism - -__all__ = [ - 'Side', 'MechanismType', 'InstrumentType', 'OpportunityType', 'EventType', 'LogLevel', - 'Instrument', 'InstrumentSet', 'Quote', 'Opportunity', 'Execution', - 'StepEvent', 'StepLogs', 'StepMetrics', 'MarketState', 'HiddenState', 'Observation', 'StepResult', - 'PositionModel', 'PositionConfig', 'make_instruments', - 'Platform', 'PlatformConfig', - 'DefaultObservationBuilder', 'ObservationConfig', - 'PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism', -] diff --git a/lab/outlet/constants.py b/lab/outlet/constants.py deleted file mode 100644 index 27c7da2..0000000 --- a/lab/outlet/constants.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Constants and enumerations for the Quote-Control simulator. - -This module defines the core enums used throughout the platform to ensure -type safety and consistent semantics across different pricing mechanisms. -""" -from enum import Enum, auto - -class Side(Enum): - """Transaction side indicator. - - Attributes: - BUY: Buyer-initiated transaction (customer purchases, market buy order) - SELL: Seller-initiated transaction (market sell order, short sale) - """ - BUY = auto() - SELL = auto() - -class MechanismType(Enum): - """Pricing mechanism type defining how quotes translate to executions. - - Attributes: - POSTED_PRICE: Single posted price per instrument (retail dynamic pricing) - TWO_SIDED_QUOTE: Bid-ask spread quoting (market making, liquidity provision) - AUCTION: Reserve price or bid shading (ad auctions, marketplaces) - """ - POSTED_PRICE = auto() - TWO_SIDED_QUOTE = auto() - AUCTION = auto() - -class InstrumentType(Enum): - """Type of instrument being priced. - - Attributes: - SKU: Retail product with inventory constraints - ASSET: Financial instrument with position limits - LOAN: Credit product with interest rate pricing - SUBSCRIPTION: Recurring service with periodic fees - """ - SKU = auto() - ASSET = auto() - LOAN = auto() - SUBSCRIPTION = auto() - -class OpportunityType(Enum): - """Type of arrival opportunity. - - Attributes: - SESSION: Retail browsing session with potential purchase intent - MARKET_ORDER: Financial market order arrival (buy or sell) - REQUEST: Service or credit request requiring quote response - """ - SESSION = auto() - MARKET_ORDER = auto() - REQUEST = auto() - -class EventType(Enum): - """Type of logged event during simulation. - - Attributes: - ARRIVAL: New opportunity arrived in the system - EXPOSURE: Quote was shown to an arrival - EXECUTION: Transaction was executed - ABANDON: Opportunity abandoned without execution - CANCEL: Pending order was cancelled - """ - ARRIVAL = auto() - EXPOSURE = auto() - EXECUTION = auto() - ABANDON = auto() - CANCEL = auto() - -class LogLevel(Enum): - """Verbosity level for step logging. - - Attributes: - NONE: No logging, fastest execution - AGG_ONLY: Only aggregate statistics per step - FULL: Full event-level logging with propensities for OPE - """ - NONE = auto() - AGG_ONLY = auto() - FULL = auto() diff --git a/lab/outlet/gym_wrapper.py b/lab/outlet/gym_wrapper.py deleted file mode 100644 index 790adcf..0000000 --- a/lab/outlet/gym_wrapper.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -Gymnasium-compatible wrapper for the Quote-Control platform. - -Provides a standard Gym interface for RL training: -- observation_space: Box space with flattened observation -- action_space: Box space with price multipliers [0.5, 2.0] -- reset(), step(), render(), close() methods - -Example: - >>> from lab.config import make_retail_platform - >>> from lab.outlet.gym_wrapper import QuoteGymEnv - >>> env = QuoteGymEnv(make_retail_platform()) - >>> obs, info = env.reset() - >>> obs, reward, done, truncated, info = env.step(env.action_space.sample()) -""" -from __future__ import annotations -from typing import Any -import numpy as np - -try: - import gymnasium as gym - from gymnasium import spaces - HAS_GYM = True -except ImportError: - HAS_GYM = False - -from .platform import Platform, PlatformConfig -from .types import Quote, InstrumentSet, StepResult - -class QuoteGymEnv: - """Gymnasium-compatible environment wrapper. - - Wraps a Platform instance with standard Gym interface. - Actions are price multipliers in [0.5, 2.0] applied to reference prices. - Observations are flattened numpy arrays containing quotes, fills, exposures. - """ - - def __init__(self, platform: Platform): - if not HAS_GYM: - raise ImportError("gymnasium required for QuoteGymEnv") - self.platform = platform - self.n = platform.instruments.n - self._last_result: StepResult | None = None - - # action space: price adjustments as multipliers [0.5, 2.0] - self.action_space = spaces.Box(low=0.5, high=2.0, shape=(self.n,), dtype=np.float32) - - # observation space - obs_dim = self.n * 4 # quotes + fills + exposures + position - if platform.market: - obs_dim += self.n # competitor quotes - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, - shape=(obs_dim,), dtype=np.float32) - - def reset(self, seed: int | None = None, options: dict | None = None) -> tuple[np.ndarray, dict]: - result = self.platform.reset(seed) - self._last_result = result - return result.obs.to_flat().astype(np.float32), result.info - - def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]: - # convert action (multipliers) to absolute prices - refs = self.platform.instruments.refs - prices = refs * action - result = self.platform.step(prices) - self._last_result = result - return (result.obs.to_flat().astype(np.float32), result.reward, - result.terminated, result.truncated, result.info) - - def render(self) -> None: - if self._last_result: - m = self._last_result.metrics - print(f"t={self.platform._t} pnl={m.pnl:.2f} units={m.units_traded:.0f} " - f"conv={m.conversion:.3f} vol={m.volatility:.3f}") - - def close(self) -> None: - pass - -def make_env(platform: Platform) -> QuoteGymEnv: - return QuoteGymEnv(platform) - -if HAS_GYM: - # register if gymnasium available - try: - gym.register(id='QuoteControl-v0', entry_point='outlet.gym_wrapper:QuoteGymEnv') - except: - pass # already registered or other issue diff --git a/lab/outlet/math_util.py b/lab/outlet/math_util.py deleted file mode 100644 index da78745..0000000 --- a/lab/outlet/math_util.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Numerical utilities for stable computation. - -This module provides numerically stable implementations of common operations: -- safe_exp, safe_log: Avoid overflow/underflow -- softmax: Numerically stable softmax -- sigmoid, clamp: Standard transformations -- intensity_decay: Avellaneda-Stoikov fill intensity -- inventory_penalty: Quadratic inventory risk -- poisson_arrivals, hawkes_intensity: Arrival process helpers - -All functions accept both scalars and numpy arrays. -""" -import numpy as np - -EPS = 1e-8 # small constant to avoid division by zero -MAX_EXP = 700.0 # maximum safe exponent to avoid overflow - -def safe_exp(x: np.ndarray | float) -> np.ndarray | float: - return np.exp(np.clip(x, -MAX_EXP, MAX_EXP)) - -def safe_log(x: np.ndarray | float) -> np.ndarray | float: - return np.log(np.maximum(x, EPS)) - -def clamp(x: np.ndarray | float, lo: float, hi: float) -> np.ndarray | float: - return np.clip(x, lo, hi) - -def sigmoid(x: np.ndarray | float) -> np.ndarray | float: - return 1.0 / (1.0 + safe_exp(-x)) - -def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray: - x_max = np.max(x, axis=axis, keepdims=True) - exp_x = safe_exp(x - x_max) - return exp_x / (np.sum(exp_x, axis=axis, keepdims=True) + EPS) - -def geometric_series(base: float, ratio: float, n: int) -> np.ndarray: - return base * (ratio ** np.arange(n)) - -def ema(old: float, new: float, alpha: float = 0.1) -> float: - return alpha * new + (1 - alpha) * old - -def intensity_decay(distance: float, kappa: float = 1.0) -> float: - """Avellaneda-Stoikov style fill intensity decay with quote distance""" - return safe_exp(-kappa * distance) - -def inventory_penalty(q: float, gamma: float = 0.1, sigma: float = 1.0) -> float: - """Quadratic inventory risk penalty""" - return gamma * sigma**2 * q**2 / 2 - -def poisson_arrivals(rate: float, dt: float, rng: np.random.Generator) -> int: - return rng.poisson(rate * dt) - -def hawkes_intensity(base: float, history: np.ndarray, alpha: float, beta: float, t: float) -> float: - """Self-exciting Hawkes process intensity""" - if len(history) == 0: return base - decays = safe_exp(-beta * (t - history[history < t])) - return base + alpha * np.sum(decays) diff --git a/lab/outlet/mechanisms/__init__.py b/lab/outlet/mechanisms/__init__.py deleted file mode 100644 index 3c3c36e..0000000 --- a/lab/outlet/mechanisms/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .posted_price import PostedPriceMechanism -from .two_sided import TwoSidedMechanism -from .auction import AuctionMechanism - -__all__ = ['PostedPriceMechanism', 'TwoSidedMechanism', 'AuctionMechanism'] diff --git a/lab/outlet/mechanisms/auction.py b/lab/outlet/mechanisms/auction.py deleted file mode 100644 index 2260aef..0000000 --- a/lab/outlet/mechanisms/auction.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Auction mechanism for reserve pricing and bid shading. - -In this mechanism, the agent sets reserve prices that affect -win probability and clearing prices. Used for ad auctions, -marketplace auctions, and similar settings. -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState -from ..constants import Side -from ..math_util import clamp, sigmoid - -@dataclass -class AuctionConfig: - """Configuration for auction mechanism. - - Attributes: - min_reserve: Minimum reserve price - max_reserve: Maximum reserve price - base_win_prob: Baseline win probability at reference reserve - sensitivity: How much higher reserves reduce win probability - """ - min_reserve: float = 0.0 - max_reserve: float = 100.0 - base_win_prob: float = 0.3 - sensitivity: float = 2.0 - -class AuctionMechanism: - """Auction mechanism for reserve pricing. - - The agent sets reserve prices that affect: - - Win probability: higher reserves reduce chance of winning - - Clearing price: bounded between reserve and simulated max bid - - Win probability: base_prob * sigmoid(-sensitivity * (reserve - ref) / ref) - Clearing price: max(reserve, min(max_bid, reserve + random_increment)) - - Only BUY-side opportunities are processed (auction wins). - """ - - def __init__(self, cfg: AuctionConfig | None = None): - self.cfg = cfg or AuctionConfig() - - def apply_quote(self, quote: Quote, instruments: InstrumentSet, - rng: np.random.Generator) -> Quote: - reserves = clamp(quote.prices, self.cfg.min_reserve, self.cfg.max_reserve) - return Quote(prices=reserves, propensity=quote.propensity, metadata=quote.metadata) - - def process_opportunity(self, opp: Opportunity, quote: Quote, - instruments: InstrumentSet, market: MarketState | None, - rng: np.random.Generator) -> Execution | None: - if opp.side != Side.BUY: return None - idx = int(opp.instrument_id) - reserve = float(quote.prices[idx]) - ref = instruments.refs[idx] - - # win probability decreases with higher reserve - relative_reserve = (reserve - ref) / (ref + 1e-8) - win_prob = self.cfg.base_win_prob * sigmoid(-self.cfg.sensitivity * relative_reserve) - - if rng.random() > win_prob: return None - - # clearing price is between reserve and some max bid (simulated) - max_bid = ref * (1 + rng.exponential(0.2)) - clearing = max(reserve, min(max_bid, reserve + rng.exponential(0.1) * ref)) - - return Execution( - opportunity_id=opp.id, instrument_id=opp.instrument_id, - side=opp.side, size_requested=opp.size, size_filled=opp.size, - price=clearing, propensity=quote.propensity * win_prob, t=opp.t - ) diff --git a/lab/outlet/mechanisms/posted_price.py b/lab/outlet/mechanisms/posted_price.py deleted file mode 100644 index 92bac12..0000000 --- a/lab/outlet/mechanisms/posted_price.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Posted price mechanism for retail dynamic pricing. - -In this mechanism, the agent posts a single price per instrument. -Buyers decide whether to purchase based on the posted price. -This is the standard e-commerce dynamic pricing model. -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState -from ..constants import Side -from ..math_util import clamp - -@dataclass -class PostedPriceConfig: - """Configuration for posted price mechanism. - - Attributes: - min_price: Absolute minimum price - max_price: Absolute maximum price - max_delta_pct: Maximum price change per step as fraction of previous - min_margin_pct: Minimum margin over cost basis - round_to: Price rounding granularity (None = no rounding) - """ - min_price: float = 0.01 - max_price: float = 1000.0 - max_delta_pct: float = 0.2 - min_margin_pct: float = 0.05 - round_to: float | None = 0.01 - -class PostedPriceMechanism: - """Posted price mechanism for retail dynamic pricing. - - The agent posts a single price per product. Constraints enforced: - - Prices within [min_price, max_price] - - Margin at least min_margin_pct above cost - - Price changes limited to max_delta_pct per step - - Prices rounded to round_to granularity - - Only BUY-side opportunities are processed (customers purchasing). - """ - - def __init__(self, cfg: PostedPriceConfig | None = None): - self.cfg = cfg or PostedPriceConfig() - - def apply_quote(self, quote: Quote, instruments: InstrumentSet, - rng: np.random.Generator) -> Quote: - prices = quote.prices.copy() - costs = instruments.costs - refs = instruments.refs - c = self.cfg - - # enforce min margin - min_prices = costs * (1 + c.min_margin_pct) - prices = np.maximum(prices, min_prices) - - # enforce absolute bounds - prices = clamp(prices, c.min_price, c.max_price) - - # enforce max delta if we have history - if 'prev_prices' in quote.metadata: - prev = quote.metadata['prev_prices'] - max_change = prev * c.max_delta_pct - prices = clamp(prices, prev - max_change, prev + max_change) - - # round prices - if c.round_to: - prices = np.round(prices / c.round_to) * c.round_to - - return Quote(prices=prices, propensity=quote.propensity, - metadata={**quote.metadata, 'prev_prices': prices}) - - def process_opportunity(self, opp: Opportunity, quote: Quote, - instruments: InstrumentSet, market: MarketState | None, - rng: np.random.Generator) -> Execution | None: - if opp.side != Side.BUY: return None # posted price is buy-only - idx = int(opp.instrument_id) - price = float(quote.prices[idx]) - return Execution( - opportunity_id=opp.id, instrument_id=opp.instrument_id, - side=opp.side, size_requested=opp.size, size_filled=opp.size, - price=price, propensity=quote.propensity, t=opp.t - ) diff --git a/lab/outlet/mechanisms/two_sided.py b/lab/outlet/mechanisms/two_sided.py deleted file mode 100644 index 166f4d9..0000000 --- a/lab/outlet/mechanisms/two_sided.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Two-sided quoting mechanism for market making. - -In this mechanism, the agent posts both bid and ask prices. -Execution depends on the distance from the market mid-price. -This models liquidity provision in financial markets. -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from ..types import Quote, Opportunity, Execution, InstrumentSet, MarketState -from ..constants import Side -from ..math_util import clamp, intensity_decay - -@dataclass -class TwoSidedConfig: - """Configuration for two-sided quoting mechanism. - - Attributes: - min_spread: Minimum bid-ask spread - max_spread: Maximum bid-ask spread - min_price: Absolute minimum price - max_price: Absolute maximum price - fill_kappa: Intensity decay parameter (higher = faster decay with distance) - """ - min_spread: float = 0.01 - max_spread: float = 0.5 - min_price: float = 0.01 - max_price: float = 10000.0 - fill_kappa: float = 1.5 - -class TwoSidedMechanism: - """Two-sided quoting mechanism for market making. - - The agent posts bid (buy) and ask (sell) prices around a mid-point. - Fill probability decays exponentially with distance from mid-price, - following the Avellaneda-Stoikov intensity model. - - Both BUY and SELL opportunities are processed: - - BUY: customer buys at agent's ask price - - SELL: customer sells at agent's bid price - """ - - def __init__(self, cfg: TwoSidedConfig | None = None): - self.cfg = cfg or TwoSidedConfig() - - def apply_quote(self, quote: Quote, instruments: InstrumentSet, - rng: np.random.Generator) -> Quote: - prices = quote.prices.copy() - spreads = quote.spreads.copy() if quote.spreads is not None else np.full_like(prices, 0.02) - c = self.cfg - - prices = clamp(prices, c.min_price, c.max_price) - spreads = clamp(spreads, c.min_spread, c.max_spread) - - # ensure bids < asks - half_spread = spreads / 2 - bids = prices - half_spread - asks = prices + half_spread - bids = np.maximum(bids, c.min_price) - asks = np.minimum(asks, c.max_price) - spreads = asks - bids - prices = (bids + asks) / 2 - - return Quote(prices=prices, spreads=spreads, propensity=quote.propensity, - metadata=quote.metadata) - - def process_opportunity(self, opp: Opportunity, quote: Quote, - instruments: InstrumentSet, market: MarketState | None, - rng: np.random.Generator) -> Execution | None: - idx = int(opp.instrument_id) - mid = market.mid_prices[idx] if market and market.mid_prices is not None else quote.prices[idx] - - if opp.side == Side.BUY: - price = float(quote.asks[idx]) if quote.asks is not None else float(quote.prices[idx]) - distance = price - mid - else: - price = float(quote.bids[idx]) if quote.bids is not None else float(quote.prices[idx]) - distance = mid - price - - # probabilistic fill based on distance from mid - fill_prob = intensity_decay(abs(distance), self.cfg.fill_kappa) - if rng.random() > fill_prob: return None - - return Execution( - opportunity_id=opp.id, instrument_id=opp.instrument_id, - side=opp.side, size_requested=opp.size, size_filled=opp.size, - price=price, propensity=quote.propensity * fill_prob, t=opp.t - ) diff --git a/lab/outlet/objectives/__init__.py b/lab/outlet/objectives/__init__.py deleted file mode 100644 index 063b7a5..0000000 --- a/lab/outlet/objectives/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .base import BaseObjective, CompositeObjective -from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty, - LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward) -from .factory import make_objective, make_composite, retail_objective, market_making_objective - -__all__ = [ - 'BaseObjective', 'CompositeObjective', - 'PnLObjective', 'VolatilityPenalty', 'HoldingCostPenalty', - 'LostOpportunityCostPenalty', 'InventoryRiskPenalty', 'SpreadCaptureReward', - 'make_objective', 'make_composite', 'retail_objective', 'market_making_objective', -] diff --git a/lab/outlet/objectives/base.py b/lab/outlet/objectives/base.py deleted file mode 100644 index 49847aa..0000000 --- a/lab/outlet/objectives/base.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Base classes for reward objectives. - -Objectives compute scalar rewards from step metrics. The CompositeObjective -allows combining multiple objectives with weights for multi-objective optimization. -""" -from __future__ import annotations -from abc import ABC, abstractmethod -from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation - -class BaseObjective(ABC): - """Abstract base class for reward objectives. - - Subclasses must implement reward() and breakdown() methods. - """ - - @abstractmethod - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ... - - @abstractmethod - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ... - -class CompositeObjective(BaseObjective): - """Weighted sum of multiple objectives. - - Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility). - - Args: - objectives: List of (objective, weight) tuples - """ - - def __init__(self, objectives: list[tuple[BaseObjective, float]]): - self.objectives = objectives - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return sum(w * obj.reward(quote, instruments, metrics, hidden, obs) - for obj, w in self.objectives) - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - bd = {} - for obj, w in self.objectives: - for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items(): - bd[k] = w * v - return bd diff --git a/lab/outlet/objectives/factory.py b/lab/outlet/objectives/factory.py deleted file mode 100644 index 6e75294..0000000 --- a/lab/outlet/objectives/factory.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Factory functions for creating objectives. - -Provides: -- make_objective: Create single objective by name -- make_composite: Create weighted combination of objectives -- retail_objective: Default objective for retail pricing -- market_making_objective: Default objective for market making -""" -from __future__ import annotations -from .base import BaseObjective, CompositeObjective -from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty, - LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward) - -REGISTRY: dict[str, type[BaseObjective]] = { - 'pnl': PnLObjective, - 'volatility': VolatilityPenalty, - 'holding_cost': HoldingCostPenalty, - 'lost_opportunity': LostOpportunityCostPenalty, - 'inventory_risk': InventoryRiskPenalty, - 'spread_capture': SpreadCaptureReward, -} - -def make_objective(name: str, **kwargs) -> BaseObjective: - """Create an objective by name. - - Args: - name: Objective name (pnl, volatility, holding_cost, lost_opportunity, - inventory_risk, spread_capture) - **kwargs: Passed to objective constructor - - Returns: - Instantiated objective - """ - if name not in REGISTRY: - raise ValueError(f"Unknown objective: {name}. Available: {list(REGISTRY.keys())}") - return REGISTRY[name](**kwargs) - -def make_composite(spec: list[tuple[str, float, dict]] | dict[str, float]) -> CompositeObjective: - """Create composite objective from specification. - - Args: - spec: Either: - - list of (name, weight, kwargs) tuples for full control - - dict of {name: weight} for simple cases - - Returns: - CompositeObjective with specified components - """ - objectives = [] - if isinstance(spec, dict): - for name, weight in spec.items(): - objectives.append((make_objective(name), weight)) - else: - for name, weight, kwargs in spec: - objectives.append((make_objective(name, **kwargs), weight)) - return CompositeObjective(objectives) - -def retail_objective(volatility_weight: float = 0.1, holding_weight: float = 0.5, - stockout_weight: float = 0.3) -> CompositeObjective: - """Default objective for retail dynamic pricing. - - Reward = PnL - volatility_weight*volatility - holding_weight*holding_cost - - stockout_weight*lost_opportunity - """ - return make_composite({ - 'pnl': 1.0, - 'volatility': volatility_weight, - 'holding_cost': holding_weight, - 'lost_opportunity': stockout_weight, - }) - -def market_making_objective(gamma: float = 0.1, sigma: float = 1.0) -> CompositeObjective: - """Default objective for market making. - - Reward = PnL + 0.5*spread_capture - inventory_risk(gamma, sigma) - """ - return CompositeObjective([ - (PnLObjective(), 1.0), - (SpreadCaptureReward(), 0.5), - (InventoryRiskPenalty(gamma=gamma, sigma=sigma), 1.0), - ]) diff --git a/lab/outlet/objectives/penalties.py b/lab/outlet/objectives/penalties.py deleted file mode 100644 index 916e0e2..0000000 --- a/lab/outlet/objectives/penalties.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Standard objective components and penalties. - -This module provides common reward terms: -- PnLObjective: Basic profit and loss -- VolatilityPenalty: Penalize price volatility for UX -- HoldingCostPenalty: Inventory holding cost -- LostOpportunityCostPenalty: Stockout/missed fill cost -- InventoryRiskPenalty: Quadratic inventory risk (market making) -- SpreadCaptureReward: Bid-ask spread capture (market making) -""" -from __future__ import annotations -import numpy as np -from .base import BaseObjective -from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation -from ..math_util import inventory_penalty - -class PnLObjective(BaseObjective): - """Profit and loss reward (revenue - cost).""" - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return metrics.pnl - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost} - -class VolatilityPenalty(BaseObjective): - """Penalize price volatility for user experience.""" - - def __init__(self, scale: float = 1.0): - self.scale = scale - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return -self.scale * metrics.volatility - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'volatility_penalty': -self.scale * metrics.volatility} - -class HoldingCostPenalty(BaseObjective): - """Penalty for inventory holding costs.""" - - def __init__(self, scale: float = 1.0): - self.scale = scale - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return -self.scale * metrics.position_cost - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'holding_cost_penalty': -self.scale * metrics.position_cost} - -class LostOpportunityCostPenalty(BaseObjective): - """Penalty for lost sales due to stockouts or missed fills.""" - - def __init__(self, scale: float = 1.0): - self.scale = scale - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return -self.scale * metrics.lost_opportunity - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity} - -class InventoryRiskPenalty(BaseObjective): - """Quadratic inventory risk penalty (Avellaneda-Stoikov style). - - Penalty = gamma * sigma^2 * q^2 / 2, where q is total position. - Encourages market makers to keep inventory near zero. - """ - - def __init__(self, gamma: float = 0.1, sigma: float = 1.0): - self.gamma = gamma - self.sigma = sigma - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - if obs.position is None: return 0.0 - q = np.sum(obs.position) - return -inventory_penalty(q, self.gamma, self.sigma) - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)} - -class SpreadCaptureReward(BaseObjective): - """Reward for capturing bid-ask spread in market making.""" - - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: - return metrics.spread_capture - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: - return {'spread_capture': metrics.spread_capture} diff --git a/lab/outlet/observation.py b/lab/outlet/observation.py deleted file mode 100644 index cffc71b..0000000 --- a/lab/outlet/observation.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Observation construction with demand censoring. - -This module provides the ObservationBuilder that constructs agent observations -from step data. The key invariant is that observations only contain censored -data (fills) and never true demand, ensuring proper research conditions. - -The ObservationConfig controls what is included in observations: -- Position visibility -- Market/competitor visibility -- Demand proxy method -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from .types import Quote, InstrumentSet, StepLogs, StepMetrics, MarketState, HiddenState, Observation - -@dataclass -class ObservationConfig: - """Configuration for observation construction. - - Attributes: - include_position: Include current position in observation - include_market: Include market/competitor state in observation - mask_true_demand: If True, observation excludes true demand (research mode) - demand_proxy: Method for demand proxy ('fills', 'exposures', 'weighted') - exposure_weights: Weights for weighted demand proxy - """ - include_position: bool = True - include_market: bool = True - mask_true_demand: bool = True - demand_proxy: str = 'fills' - exposure_weights: dict[str, float] | None = None - -class DefaultObservationBuilder: - """Constructs censored observations for the agent. - - Ensures the key research invariant: observations contain only - censored fills (realized sales), never true demand. True demand - is placed in the info dict for research analysis only. - """ - - def __init__(self, cfg: ObservationConfig | None = None): - self.cfg = cfg or ObservationConfig() - - def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs, - metrics: StepMetrics, market: MarketState | None, - hidden: HiddenState, mask_demand: bool, t: int) -> Observation: - n = instruments.n - cfg = self.cfg - - # always show censored fills - fills = logs.censored_fills if logs.censored_fills is not None else np.zeros(n) - - # compute exposures from logs - if logs.events: - exposures = np.zeros(n) - for e in logs.events: - if e.instrument_id is not None: - exposures[e.instrument_id] += 1 - else: - exposures = logs.aggregates.get('exposures', np.zeros(n)) - - # position - only if configured and available - position = None - if cfg.include_position and instruments.position is not None: - position = instruments.position.copy() - - # market state - only if configured - obs_market = market if cfg.include_market else None - - return Observation( - quotes=quote.prices.copy(), - position=position, - fills=fills, - exposures=exposures, - market=obs_market, - t=t - ) - - def make_space(self, n_instruments: int, include_market: bool = True) -> dict: - """Returns dict describing observation space for gym""" - space = { - 'quotes': {'shape': (n_instruments,), 'low': 0, 'high': np.inf}, - 'fills': {'shape': (n_instruments,), 'low': 0, 'high': np.inf}, - 'exposures': {'shape': (n_instruments,), 'low': 0, 'high': np.inf}, - } - if self.cfg.include_position: - space['position'] = {'shape': (n_instruments,), 'low': -np.inf, 'high': np.inf} - if include_market: - space['competitor_quotes'] = {'shape': (n_instruments,), 'low': 0, 'high': np.inf} - return space diff --git a/lab/outlet/platform.py b/lab/outlet/platform.py deleted file mode 100644 index eabb69a..0000000 --- a/lab/outlet/platform.py +++ /dev/null @@ -1,285 +0,0 @@ -""" -Main simulation platform orchestrating the Quote-Control loop. - -The Platform class is the central coordinator that: -1. Receives pricing actions (quotes) from the agent -2. Generates arrivals via the ArrivalModel -3. Processes executions via Mechanism and ExecutionModel -4. Applies position censorship via PositionModel -5. Computes metrics and reward via Objective -6. Returns censored observations - -Example: - >>> from lab.config import make_retail_platform - >>> platform = make_retail_platform() - >>> result = platform.reset(seed=42) - >>> result = platform.step(platform.instruments.refs * 1.1) - >>> print(f"PnL: {result.metrics.pnl:.2f}") -""" -from __future__ import annotations -from dataclasses import dataclass, field -from typing import Any -import numpy as np -from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs, StepMetrics, - StepEvent, MarketState, HiddenState, Observation, StepResult) -from .constants import LogLevel, EventType, Side -from .protocols import Mechanism, ArrivalModel, ExecutionModel, PositionModel, MarketModel, ObservationBuilder, Objective -from .stock import PositionModel as DefaultPositionModel, PositionConfig -from .observation import DefaultObservationBuilder, ObservationConfig -from .objectives.factory import retail_objective - -@dataclass -class PlatformConfig: - """Configuration for the simulation platform. - - Attributes: - n_instruments: Number of instruments in the simulation - max_steps: Maximum steps before episode terminates - dt: Time duration per step (affects arrival rates) - log_level: Verbosity of logging (NONE, AGG_ONLY, FULL) - mask_demand: If True, observations exclude true demand (research mode) - seed: Random seed for reproducibility - """ - n_instruments: int = 10 - max_steps: int = 1000 - dt: float = 1.0 - log_level: LogLevel = LogLevel.AGG_ONLY - mask_demand: bool = True - seed: int | None = None - -class Platform: - """Main simulation orchestrator implementing Quote -> Arrival -> Execution -> Position. - - The Platform coordinates all components to simulate a pricing environment: - - Mechanism: validates quotes and determines execution logic - - ArrivalModel: generates demand opportunities - - ExecutionModel: computes acceptance probabilities - - PositionModel: manages inventory/position and censorship - - MarketModel: updates competitor/market state - - ObservationBuilder: constructs censored observations - - Objective: computes reward from metrics - - Attributes: - instruments: The instrument set being priced - mechanism: Quote validation and execution mechanism - arrival: Demand arrival generator - execution: Acceptance probability model - position: Inventory/position manager - market: Competitor/market dynamics (optional) - obs_builder: Observation constructor - objective: Reward function - cfg: Platform configuration - """ - - def __init__(self, instruments: InstrumentSet, mechanism: Mechanism, - arrival: ArrivalModel, execution: ExecutionModel, - position: PositionModel | None = None, - market: MarketModel | None = None, - obs_builder: ObservationBuilder | None = None, - objective: Objective | None = None, - cfg: PlatformConfig | None = None): - self.instruments = instruments - self.mechanism = mechanism - self.arrival = arrival - self.execution = execution - self.position = position or DefaultPositionModel(PositionConfig()) - self.market = market - self.obs_builder = obs_builder or DefaultObservationBuilder() - self.objective = objective or retail_objective() - self.cfg = cfg or PlatformConfig(n_instruments=instruments.n) - - self._t: int = 0 - self._rng: np.random.Generator = np.random.default_rng(self.cfg.seed) - self._quote: Quote | None = None - self._market_state: MarketState | None = None - self._hidden: HiddenState = HiddenState() - self._prev_prices: np.ndarray | None = None - - def reset(self, seed: int | None = None) -> StepResult: - """Reset the platform to initial state. - - Args: - seed: Random seed (overrides config seed if provided) - - Returns: - Initial StepResult with zeroed metrics and initial observation - """ - self._t = 0 - self._rng = np.random.default_rng(seed or self.cfg.seed) - self._hidden = HiddenState() - self._prev_prices = self.instruments.refs.copy() - - # reset position - self.position.reset(self.instruments, self._rng) - self.instruments.position = self.position.position - - # initial quote at reference prices - self._quote = Quote(prices=self.instruments.refs.copy(), propensity=1.0, - metadata={'prev_prices': self._prev_prices}) - self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng) - - # initial market state - if self.market: - self._market_state = self.market.step(0, self._quote, self._hidden, self._rng) - - # build initial observation - logs = StepLogs(aggregates={'reset': True}, - true_demand=np.zeros(self.instruments.n), - censored_fills=np.zeros(self.instruments.n)) - metrics = StepMetrics() - obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics, - self._market_state, self._hidden, self.cfg.mask_demand, 0) - - return StepResult(obs=obs, reward=0.0, terminated=False, truncated=False, - info={'true_demand': logs.true_demand}, metrics=metrics, - logs=logs, hidden=self._hidden) - - def step(self, action: np.ndarray, propensity: float = 1.0) -> StepResult: - """Execute one simulation step with the given pricing action. - - The step proceeds as follows: - 1. Apply quote constraints via mechanism - 2. Update market/competitor state - 3. Generate arrivals - 4. Process arrivals -> executions with acceptance check - 5. Apply position censorship to executions - 6. Update position state - 7. Compute metrics (PnL, costs, etc.) - 8. Build logs with propensities - 9. Construct censored observation - 10. Compute reward - - Args: - action: Price vector for all instruments - propensity: P(action | behavior policy) for OPE logging - - Returns: - StepResult containing observation, reward, metrics, logs, and hidden state - """ - self._t += 1 - cfg = self.cfg - - # 1. apply quote from action - self._quote = Quote(prices=action, propensity=propensity, - metadata={'prev_prices': self._prev_prices}) - self._quote = self.mechanism.apply_quote(self._quote, self.instruments, self._rng) - self._prev_prices = self._quote.prices.copy() - self._hidden.quote_history.append(self._quote.prices.copy()) - - # 2. update market/competitors - if self.market: - self._market_state = self.market.step(self._t, self._quote, self._hidden, self._rng) - self._hidden.market_history.append(self._market_state) - - # 3. generate arrivals - opps = self.arrival.sample(self._t, cfg.dt, self.instruments, - self._market_state, self._hidden, self._rng) - - # 4. process opportunities -> executions - executions: list[Execution] = [] - events: list[StepEvent] = [] - true_demand = np.zeros(self.instruments.n) - - for opp in opps: - # log exposure - if cfg.log_level == LogLevel.FULL: - events.append(StepEvent(t=opp.t, type=EventType.EXPOSURE, - instrument_id=opp.instrument_id, - opportunity_id=opp.id, - price=float(self._quote.prices[opp.instrument_id]), - propensity=self._quote.propensity)) - - # check acceptance - prob = self.execution.prob(opp, self._quote, self.instruments, - self._market_state, self._rng) - if self._rng.random() < prob: - # create execution - exe = self.mechanism.process_opportunity(opp, self._quote, self.instruments, - self._market_state, self._rng) - if exe: - true_demand[exe.instrument_id] += exe.size_requested - # apply position censorship - exe = self.position.apply_execution(exe) - executions.append(exe) - if cfg.log_level == LogLevel.FULL: - events.append(StepEvent(t=exe.t, type=EventType.EXECUTION, - instrument_id=exe.instrument_id, - opportunity_id=exe.opportunity_id, - price=exe.price, size=exe.size_filled, - propensity=exe.propensity)) - - # 5. update position state - self.position.step(self._t) - self.instruments.position = self.position.position - - # 6. compute metrics - censored_fills = np.zeros(self.instruments.n) - revenue = 0.0 - cost = 0.0 - spread_capture = 0.0 - - for exe in executions: - censored_fills[exe.instrument_id] += exe.size_filled - if exe.side == Side.BUY: - revenue += exe.price * exe.size_filled - cost += self.instruments.costs[exe.instrument_id] * exe.size_filled - else: - revenue -= exe.price * exe.size_filled - cost -= self.instruments.costs[exe.instrument_id] * exe.size_filled - # spread capture for market making - if self._quote.spreads is not None and self._market_state and self._market_state.mid_prices is not None: - mid = self._market_state.mid_prices[exe.instrument_id] - if exe.side == Side.BUY: - spread_capture += (exe.price - mid) * exe.size_filled - else: - spread_capture += (mid - exe.price) * exe.size_filled - - pnl = revenue - cost - units = float(np.sum(censored_fills)) - lost = float(np.sum(true_demand - censored_fills)) - - # volatility - volatility = 0.0 - if len(self._hidden.quote_history) > 1: - prev = self._hidden.quote_history[-2] - volatility = float(np.mean(np.abs(self._quote.prices - prev) / (prev + 1e-8))) - - metrics = StepMetrics( - pnl=pnl, revenue=revenue, cost=cost, units_traded=units, - position_cost=self.position.holding_cost, - lost_opportunity=self.position.shortage_cost + lost * np.mean(self._quote.prices) * 0.1, - spread_capture=spread_capture, volatility=volatility, - conversion=units / (len(opps) + 1e-8), - per_instrument={'fills': censored_fills, 'demand': true_demand} - ) - - # 7. build logs - logs = StepLogs( - events=events if cfg.log_level == LogLevel.FULL else None, - executions=executions if cfg.log_level == LogLevel.FULL else None, - aggregates={'n_arrivals': len(opps), 'n_executions': len(executions), - 'exposures': np.bincount([o.instrument_id for o in opps], - minlength=self.instruments.n).astype(float)}, - true_demand=true_demand, - censored_fills=censored_fills - ) - - # 8. build observation - obs = self.obs_builder.build(self._quote, self.instruments, logs, metrics, - self._market_state, self._hidden, cfg.mask_demand, self._t) - - # 9. compute reward - reward = self.objective.reward(self._quote, self.instruments, metrics, self._hidden, obs) - breakdown = self.objective.breakdown(self._quote, self.instruments, metrics, self._hidden, obs) - # print(f"Step {self._t}: Reward={reward:.2f}, Breakdown={breakdown}") - - - # 10. check termination - terminated = self._t >= cfg.max_steps - truncated = False - - info = {'true_demand': true_demand, 'breakdown': self.objective.breakdown( - self._quote, self.instruments, metrics, self._hidden, obs)} - - return StepResult(obs=obs, reward=reward, terminated=terminated, truncated=truncated, - info=info, metrics=metrics, logs=logs, hidden=self._hidden) diff --git a/lab/outlet/protocols.py b/lab/outlet/protocols.py deleted file mode 100644 index 13bf967..0000000 --- a/lab/outlet/protocols.py +++ /dev/null @@ -1,297 +0,0 @@ -""" -Protocol definitions for pluggable simulator components. - -This module defines the interfaces (Protocols) that allow swapping different -implementations for each stage of the Quote -> Arrival -> Execution -> Position -pipeline. All protocols use structural subtyping (duck typing). - -Protocols: - Mechanism: How quotes translate to executions (posted price, two-sided, auction) - ArrivalModel: How opportunities arrive (Poisson, Hawkes, sessions) - ExecutionModel: Acceptance probability given quote (elasticity, intensity) - PositionModel: Inventory/position management and censorship - MarketModel: Competitor/market dynamics - ObservationBuilder: Constructs agent observations with censoring - Objective: Computes reward from metrics -""" -from __future__ import annotations -from typing import Protocol, Any, TYPE_CHECKING -import numpy as np -if TYPE_CHECKING: - from .types import (Quote, Opportunity, Execution, InstrumentSet, StepLogs, - StepMetrics, HiddenState, Observation, MarketState) - from .constants import LogLevel - -class Mechanism(Protocol): - """Defines how quotes translate to executions. - - The Mechanism is the core abstraction that differentiates pricing domains: - - PostedPrice: single price, buyer decides to purchase or not - - TwoSided: bid/ask spread, execution depends on distance from mid - - Auction: reserve price affects win probability and clearing price - - Methods: - apply_quote: Enforce constraints and return valid quote - process_opportunity: Determine execution given opportunity and quote - """ - def apply_quote(self, quote: Quote, instruments: InstrumentSet, - rng: np.random.Generator) -> Quote: - """Apply mechanism-specific constraints to a quote. - - Args: - quote: Raw quote from policy - instruments: Current instrument set with costs/refs - rng: Random generator for stochastic constraints - - Returns: - Constrained quote satisfying mechanism rules (min margin, max delta, etc.) - """ - ... - - def process_opportunity(self, opp: Opportunity, quote: Quote, - instruments: InstrumentSet, market: MarketState | None, - rng: np.random.Generator) -> Execution | None: - """Process an opportunity against the current quote. - - Args: - opp: Incoming opportunity (session, order, request) - quote: Current posted quote - instruments: Instrument set - market: Current market state (competitor prices, mid-prices) - rng: Random generator - - Returns: - Execution if opportunity converts, None otherwise - """ - ... - -class ArrivalModel(Protocol): - """Generates opportunities (demand arrivals) for each step. - - Different arrival models capture different demand dynamics: - - Poisson: constant rate, memoryless - - Hawkes: self-exciting, clustered arrivals - - Session: retail browsing with multi-product views - - Methods: - sample: Generate opportunities for a time interval - """ - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, - rng: np.random.Generator) -> list[Opportunity]: - """Sample opportunities for time interval [t, t+dt). - - Args: - t: Current time - dt: Time interval length - instruments: Available instruments - market: Current market state - hidden: Hidden state (contains demand intensity, contamination) - rng: Random generator - - Returns: - List of opportunities arriving in this interval - """ - ... - -class ExecutionModel(Protocol): - """Computes acceptance/execution probability given quote and context. - - Different models capture different demand responses: - - Elasticity: price sensitivity with competitor cross-effects - - Intensity: distance-based fill probability (market making) - - Logit: discrete choice model - - Methods: - prob: Compute acceptance probability - uncensor: Estimate true demand from censored fills - """ - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - """Compute probability that opportunity accepts the quote. - - Args: - opp: Opportunity to evaluate - quote: Current quote - instruments: Instrument set - market: Market state (competitor prices affect cross-elasticity) - rng: Random generator - - Returns: - Probability in [0, 1] that opportunity executes - """ - ... - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, - context: dict[str, Any] | None = None) -> np.ndarray: - """Estimate true demand from censored fills. - - Used for demand estimation research under inventory censorship. - - Args: - fills: Observed (censored) fill counts - instruments: Instrument set - context: Additional context (exposures, prices shown) - - Returns: - Estimated true demand counts - """ - ... - -class PositionModel(Protocol): - """Manages inventory (retail) or position (finance). - - Handles: - - Position constraints and censorship - - Holding costs (retail) or inventory risk (finance) - - Replenishment and order receipt - - Methods: - reset: Initialize position state - available: Query available capacity for a trade - apply_execution: Censor execution by available position - step: Process time-based updates (replenishment, holding cost) - - Properties: - position: Current position vector - holding_cost: Cost incurred this step from holding position - """ - def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None: - """Initialize position state for new episode.""" - ... - - def available(self, instrument_id: int, side: Any) -> float: - """Query available capacity for a trade. - - Args: - instrument_id: Which instrument - side: BUY or SELL - - Returns: - Maximum tradeable size given current position - """ - ... - - def apply_execution(self, exe: Execution) -> Execution: - """Apply position constraints to an execution. - - Args: - exe: Proposed execution with size_requested - - Returns: - Censored execution with size_filled <= available capacity - """ - ... - - def step(self, t: float) -> None: - """Process time-based position updates. - - Handles replenishment receipt, holding cost calculation, etc. - """ - ... - - @property - def position(self) -> np.ndarray: - """Current position vector (positive=long/inventory, negative=short).""" - ... - - @property - def holding_cost(self) -> float: - """Holding cost incurred this step.""" - ... - -class MarketModel(Protocol): - """Models external market dynamics and competitor behavior. - - For retail: competitor price dynamics (static, reactive, stochastic) - For finance: mid-price process (GBM, mean-reverting) - - Methods: - step: Update market state given agent's quotes - """ - def step(self, t: float, self_quotes: Quote, hidden: HiddenState, - rng: np.random.Generator) -> MarketState: - """Update market state for this timestep. - - Args: - t: Current time - self_quotes: Agent's current quotes (competitors may react) - hidden: Hidden state (regime info) - rng: Random generator - - Returns: - Updated market state with competitor prices, mid-prices, volatility - """ - ... - -class ObservationBuilder(Protocol): - """Constructs agent observations with appropriate censoring. - - Critical for research: ensures agent only sees censored fills, - never true demand (which goes in info dict). - - Methods: - build: Construct observation from step data - """ - def build(self, quote: Quote, instruments: InstrumentSet, logs: StepLogs, - metrics: StepMetrics, market: MarketState | None, - hidden: HiddenState, mask_demand: bool, t: int) -> Observation: - """Build observation for agent. - - Args: - quote: Current quote - instruments: Instrument set with positions - logs: Step logs with true_demand and censored_fills - metrics: Computed metrics - market: Market state - hidden: Hidden state (not included in obs) - mask_demand: If True, exclude true demand from observation - t: Current timestep - - Returns: - Observation containing only observable quantities - """ - ... - -class Objective(Protocol): - """Computes reward from step metrics. - - Supports composite objectives with weighted terms: - - PnL (profit) - - Position costs (holding, inventory risk) - - Lost opportunity (stockouts) - - Volatility penalty (UX) - - Spread capture (market making) - - Methods: - reward: Compute scalar reward - breakdown: Get per-term contribution for analysis - """ - def reward(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, - obs: Observation) -> float: - """Compute scalar reward for this step. - - Args: - quote: Current quote - instruments: Instrument set - metrics: Step metrics (pnl, costs, etc.) - hidden: Hidden state - obs: Agent observation - - Returns: - Scalar reward value - """ - ... - - def breakdown(self, quote: Quote, instruments: InstrumentSet, - metrics: StepMetrics, hidden: HiddenState, - obs: Observation) -> dict[str, float]: - """Get reward breakdown by component. - - Useful for analyzing which terms dominate the reward. - - Returns: - Dict mapping term names to their contributions - """ - ... diff --git a/lab/outlet/stock.py b/lab/outlet/stock.py deleted file mode 100644 index b2c88a2..0000000 --- a/lab/outlet/stock.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -Inventory/position management and instrument factories. - -This module provides: -- PositionConfig: Configuration for position constraints and costs -- PositionModel: Manages inventory (retail) or position (finance) -- make_instruments: Factory for creating instrument sets - -The PositionModel handles demand censorship by limiting executions -to available inventory, computing holding costs, and managing replenishment. -""" -from __future__ import annotations -from dataclasses import dataclass, field -import numpy as np -from .types import Instrument, InstrumentSet, Execution -from .constants import Side, InstrumentType - -@dataclass -class PositionConfig: - """Configuration for position/inventory management. - - Attributes: - initial_position: Starting inventory (None = unlimited, float = same for all) - max_position: Maximum long position per instrument - min_position: Maximum short position (negative, for finance) - holding_cost_rate: Cost per unit per step for holding inventory - shortage_cost_rate: Opportunity cost rate for stockouts - lead_time: Steps until replenishment orders arrive - """ - initial_position: np.ndarray | float | None = None - max_position: float = 1000.0 - min_position: float = -1000.0 - holding_cost_rate: float = 0.001 - shortage_cost_rate: float = 0.05 - lead_time: int = 0 - -@dataclass -class PositionModel: - """Manages inventory (retail) or position (finance) with censorship. - - Key responsibilities: - - Track current position per instrument - - Censor executions when position is insufficient - - Compute holding costs per step - - Track shortage/stockout costs - - Handle replenishment orders with lead time - - For retail: position is inventory (positive), selling reduces it - For finance: position can be positive (long) or negative (short) - """ - cfg: PositionConfig - n: int = 0 - _position: np.ndarray = field(default_factory=lambda: np.array([])) - _pending_orders: list[tuple[int, np.ndarray]] = field(default_factory=list) - _step_holding_cost: float = 0.0 - _step_shortage_cost: float = 0.0 - - def reset(self, instruments: InstrumentSet, rng: np.random.Generator) -> None: - self.n = instruments.n - if self.cfg.initial_position is None: - self._position = np.full(self.n, np.inf) # unlimited - elif isinstance(self.cfg.initial_position, (int, float)): - self._position = np.full(self.n, float(self.cfg.initial_position)) - else: - self._position = self.cfg.initial_position.copy().astype(np.float64) - self._pending_orders = [] - self._step_holding_cost = 0.0 - self._step_shortage_cost = 0.0 - - def available(self, instrument_id: int, side: Side) -> float: - pos = self._position[instrument_id] - if np.isinf(pos): return np.inf - if side == Side.BUY: - return max(0, pos) # can sell up to current inventory - else: - return max(0, self.cfg.max_position - pos) # can buy up to max - - def apply_execution(self, exe: Execution) -> Execution: - idx = int(exe.instrument_id) - avail = self.available(idx, exe.side) - filled = min(exe.size_requested, avail) - shortage = exe.size_requested - filled - - if exe.side == Side.BUY: - self._position[idx] -= filled # sold from inventory - else: - self._position[idx] += filled # bought into inventory - - if shortage > 0: - self._step_shortage_cost += shortage * exe.price * self.cfg.shortage_cost_rate - - return Execution( - opportunity_id=exe.opportunity_id, instrument_id=exe.instrument_id, - side=exe.side, size_requested=exe.size_requested, - size_filled=filled, price=exe.price, propensity=exe.propensity, t=exe.t - ) - - def order(self, quantity: np.ndarray) -> None: - if self.cfg.lead_time > 0: - self._pending_orders.append((self.cfg.lead_time, quantity.copy())) - else: - self._position += quantity - - def step(self, t: float) -> None: - # compute holding cost - pos = np.where(np.isinf(self._position), 0, self._position) - self._step_holding_cost = float(np.sum(np.abs(pos)) * self.cfg.holding_cost_rate) - - # receive pending orders - new_pending = [] - for (remaining, qty) in self._pending_orders: - if remaining <= 1: - self._position += qty - else: - new_pending.append((remaining - 1, qty)) - self._pending_orders = new_pending - - @property - def position(self) -> np.ndarray: - return np.where(np.isinf(self._position), -1, self._position) - - @property - def holding_cost(self) -> float: - return self._step_holding_cost - - @property - def shortage_cost(self) -> float: - return self._step_shortage_cost - -def make_instruments(n: int, cost_range: tuple[float, float] = (1.0, 10.0), - margin_range: tuple[float, float] = (0.2, 0.5), - inst_type: InstrumentType = InstrumentType.SKU, - rng: np.random.Generator | None = None) -> InstrumentSet: - """Factory function to create a random instrument set. - - Args: - n: Number of instruments to create - cost_range: (min, max) for uniform cost sampling - margin_range: (min, max) for uniform margin sampling - inst_type: Type of instruments (SKU, ASSET, etc.) - rng: Random generator (uses default if None) - - Returns: - InstrumentSet with n instruments having random costs and margins - """ - rng = rng or np.random.default_rng() - costs = rng.uniform(*cost_range, n) - margins = rng.uniform(*margin_range, n) - items = [Instrument(id=i, type=inst_type, cost_basis=c, reference_price=c*(1+m)) - for i, (c, m) in enumerate(zip(costs, margins))] - return InstrumentSet(instruments=items) diff --git a/lab/outlet/types.py b/lab/outlet/types.py deleted file mode 100644 index db49117..0000000 --- a/lab/outlet/types.py +++ /dev/null @@ -1,318 +0,0 @@ -""" -Core data types for the Quote-Control simulator. - -This module defines the fundamental data structures used throughout the platform: -- Identifiers (InstrumentId, OpportunityId, AgentId) -- Domain objects (Instrument, Quote, Opportunity, Execution) -- Logging structures (StepEvent, StepLogs, StepMetrics) -- State containers (MarketState, HiddenState, Observation, StepResult) - -All dataclasses are designed to be serializable and numpy-compatible. -""" -from __future__ import annotations -from dataclasses import dataclass, field -from typing import Any, NewType -import numpy as np -from .constants import Side, InstrumentType, OpportunityType, EventType - -InstrumentId = NewType('InstrumentId', int) # unique instrument index -OpportunityId = NewType('OpportunityId', str) # unique opportunity/session ID -AgentId = NewType('AgentId', str) # unique agent/actor ID - -@dataclass -class Instrument: - """Represents a priceable entity in the simulation. - - An instrument can be a retail SKU, financial asset, loan product, or subscription. - The cost_basis represents the fundamental value (marginal cost for retail, - mid-price for assets, funding rate for loans). - - Attributes: - id: Unique identifier for this instrument - type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION) - cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate) - reference_price: Base or fair price used for action scaling - attrs: Additional attributes (quality score, category, volatility, etc.) - """ - id: InstrumentId - type: InstrumentType - cost_basis: float - reference_price: float - attrs: dict[str, Any] = field(default_factory=dict) - -@dataclass -class InstrumentSet: - """Collection of instruments with optional position tracking. - - Provides vectorized access to instrument properties for efficient computation. - Position can be positive (long/inventory) or negative (short) for financial assets. - - Attributes: - instruments: List of Instrument objects - position: Current position per instrument (None = unlimited capacity) - - Properties: - n: Number of instruments - costs: Vector of cost bases - refs: Vector of reference prices - """ - instruments: list[Instrument] - position: np.ndarray | None = None - - @property - def n(self) -> int: return len(self.instruments) - @property - def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32) - @property - def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32) - -@dataclass -class Quote: - """Price quote set by the policy - the action in the MDP. - - Supports multiple quoting mechanisms: - - Posted price: only `prices` field used - - Two-sided: `prices` as mid, `spreads` for bid-ask width - - Auction: `prices` as reserve prices - - The propensity field is critical for off-policy evaluation (OPE). - - Attributes: - prices: Posted prices (retail) or mid-quotes (market making) - spreads: Bid-ask spread width for two-sided quoting (None for posted price) - propensity: P(this quote | behavior policy) for importance sampling - metadata: Additional info (prev_prices for delta constraints, etc.) - - Properties: - bids: Computed bid prices (mid - spread/2) - asks: Computed ask prices (mid + spread/2) - """ - prices: np.ndarray - spreads: np.ndarray | None = None - propensity: float = 1.0 - metadata: dict[str, Any] = field(default_factory=dict) - - @property - def bids(self) -> np.ndarray | None: - return self.prices - self.spreads/2 if self.spreads is not None else None - @property - def asks(self) -> np.ndarray | None: - return self.prices + self.spreads/2 if self.spreads is not None else None - -@dataclass -class Opportunity: - """An arrival event that may result in a transaction. - - Opportunities are the demand side of the simulation: - - Retail: browsing session with purchase intent - - Market making: incoming market order - - Lending: loan application - - The context dict carries segment/type information used by execution models. - - Attributes: - id: Unique identifier for this opportunity - type: Category (SESSION, MARKET_ORDER, REQUEST) - side: BUY or SELL intent - instrument_id: Which instrument the opportunity targets - size: Requested transaction size (units, shares, principal) - t: Arrival timestamp - context: Segment info (is_scraper, credit_score, urgency, etc.) - """ - id: OpportunityId - type: OpportunityType - side: Side - instrument_id: InstrumentId - size: float = 1.0 - t: float = 0.0 - context: dict[str, Any] = field(default_factory=dict) - -@dataclass -class Execution: - """A realized transaction after acceptance and position censorship. - - The difference between size_requested and size_filled represents - censored demand due to inventory/position constraints. - - Attributes: - opportunity_id: Links back to the originating Opportunity - instrument_id: Which instrument was traded - side: BUY or SELL - size_requested: Original requested size (true demand) - size_filled: Actual filled size after censorship - price: Execution price - propensity: Combined propensity for OPE (quote * acceptance) - t: Execution timestamp - """ - opportunity_id: OpportunityId - instrument_id: InstrumentId - side: Side - size_requested: float - size_filled: float - price: float - propensity: float = 1.0 - t: float = 0.0 - -@dataclass -class StepEvent: - """Generic logged event""" - t: float - type: EventType - instrument_id: InstrumentId | None = None - opportunity_id: OpportunityId | None = None - price: float | None = None - size: float | None = None - propensity: float = 1.0 - metadata: dict[str, Any] = field(default_factory=dict) - -@dataclass -class StepLogs: - """Container for all logging data from a simulation step. - - Supports both detailed event logging (for OPE) and aggregate-only mode - (for fast simulation). The true_demand vs censored_fills distinction - is critical for research on demand estimation under censorship. - - Attributes: - events: Detailed event log (None if LogLevel != FULL) - executions: List of executed transactions (None if LogLevel != FULL) - aggregates: Always-available aggregate statistics - true_demand: Oracle demand before censorship (for research, not in obs) - censored_fills: Realized fills after position constraints (observable) - """ - events: list[StepEvent] | None = None - executions: list[Execution] | None = None - aggregates: dict[str, Any] = field(default_factory=dict) - true_demand: np.ndarray | None = None - censored_fills: np.ndarray | None = None - -@dataclass -class StepMetrics: - """Computed metrics for a single simulation step. - - Metrics are domain-aware: retail uses revenue/cost/holding_cost, - market making uses spread_capture and inventory risk. - - Attributes: - pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance) - revenue: Gross revenue from sales/executions - cost: Cost of goods sold or position acquisition cost - units_traded: Total units/shares transacted - position_cost: Holding cost (retail) or inventory risk penalty (finance) - lost_opportunity: Cost of stockouts or missed fills - spread_capture: Bid-ask spread captured (market making) - volatility: Price volatility metric for UX consideration - conversion: Fill rate (executions / opportunities) - per_instrument: Per-instrument breakdowns (fills, demand, etc.) - """ - pnl: float = 0.0 - revenue: float = 0.0 - cost: float = 0.0 - units_traded: float = 0.0 - position_cost: float = 0.0 - lost_opportunity: float = 0.0 - spread_capture: float = 0.0 - volatility: float = 0.0 - conversion: float = 0.0 - per_instrument: dict[str, np.ndarray] = field(default_factory=dict) - -@dataclass -class MarketState: - """External market conditions and competitor state. - - For retail: competitor_quotes drives cross-elasticity effects. - For finance: mid_prices and volatility drive execution dynamics. - - Attributes: - competitor_quotes: Competitor posted prices (retail) - mid_prices: Market mid-prices for assets (finance) - volatility: Per-instrument volatility estimate - regime: Market regime identifier (normal, price_war, high_vol, etc.) - t: Timestamp of this market state - """ - competitor_quotes: np.ndarray | None = None - mid_prices: np.ndarray | None = None - volatility: np.ndarray | None = None - regime: str = 'normal' - t: float = 0.0 - -@dataclass -class HiddenState: - """Internal simulator state not exposed to the agent. - - Contains oracle information for research analysis and - history needed for non-stationary dynamics. - - Attributes: - true_demand_intensity: Latent demand multiplier - contamination: Fraction of arrivals that are adversarial/scraper - regime: Current market/competitor regime - quote_history: History of agent quotes for volatility calculation - market_history: History of market states for analysis - """ - true_demand_intensity: float = 1.0 - contamination: float = 0.0 - regime: str = 'normal' - quote_history: list[np.ndarray] = field(default_factory=list) - market_history: list[MarketState] = field(default_factory=list) - -@dataclass -class Observation: - """Observable state provided to the agent - censored view only. - - Critical invariant: Observation never contains true_demand, only - censored fills. This enforces the censorship research setting. - - Attributes: - quotes: Current posted quotes (the agent's last action) - position: Current inventory/position state - fills: Censored execution counts per instrument - exposures: Opportunity exposure counts per instrument - market: Observable market state (competitor prices, volatility) - t: Current timestep - extra: Additional observable features - - Methods: - to_flat: Flatten to numpy array for gym compatibility - """ - quotes: np.ndarray - position: np.ndarray | None - fills: np.ndarray - exposures: np.ndarray - market: MarketState | None - t: int - extra: dict[str, Any] = field(default_factory=dict) - - def to_flat(self) -> np.ndarray: - """Flatten observation to 1D numpy array for gym environments.""" - parts = [self.quotes, self.fills, self.exposures] - if self.position is not None: parts.append(self.position) - if self.market and self.market.competitor_quotes is not None: - parts.append(self.market.competitor_quotes) - return np.concatenate([p.flatten() for p in parts]) - -@dataclass -class StepResult: - """Complete result from a simulation step. - - Follows gymnasium convention for obs, reward, terminated, truncated, info. - Additionally provides metrics, logs, and hidden state for research. - - Attributes: - obs: Observable state (censored) - reward: Scalar reward from objective function - terminated: Episode ended naturally (max_steps reached) - truncated: Episode ended early (bankruptcy, constraint violation) - info: Additional info dict (contains true_demand for research) - metrics: Computed metrics for this step - logs: Event logs and aggregates - hidden: Internal simulator state (oracle info) - """ - obs: Observation - reward: float - terminated: bool - truncated: bool - info: dict[str, Any] - metrics: StepMetrics - logs: StepLogs - hidden: HiddenState diff --git a/lab/population/__init__.py b/lab/population/__init__.py deleted file mode 100644 index 081dbd0..0000000 --- a/lab/population/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .arrivals import PoissonArrivalModel, HawkesArrivalModel, SessionArrivalModel -from .execution import ElasticityExecutionModel, IntensityExecutionModel, LogitExecutionModel -from .competitors import (StaticCompetitorModel, ReactiveCompetitorModel, - StochasticCompetitorModel, GBMMarketModel) - -__all__ = [ - 'PoissonArrivalModel', 'HawkesArrivalModel', 'SessionArrivalModel', - 'ElasticityExecutionModel', 'IntensityExecutionModel', 'LogitExecutionModel', - 'StaticCompetitorModel', 'ReactiveCompetitorModel', 'StochasticCompetitorModel', 'GBMMarketModel', -] diff --git a/lab/population/arrivals.py b/lab/population/arrivals.py deleted file mode 100644 index b7e7ed6..0000000 --- a/lab/population/arrivals.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Arrival models for generating demand opportunities. - -This module provides different arrival processes: -- PoissonArrivalModel: Constant-rate memoryless arrivals -- HawkesArrivalModel: Self-exciting clustered arrivals (market orders) -- SessionArrivalModel: Retail browsing sessions with multi-product views - -Each model implements the ArrivalModel protocol and generates Opportunity objects -that flow through the execution pipeline. -""" -from __future__ import annotations -from dataclasses import dataclass -from typing import Callable -import numpy as np -from uuid import uuid4 -from ..outlet.types import Opportunity, InstrumentSet, MarketState, HiddenState -from ..outlet.constants import Side, OpportunityType -from ..outlet.math_util import poisson_arrivals, hawkes_intensity - -@dataclass -class PoissonArrivalConfig: - """Configuration for Poisson arrival process. - - Attributes: - base_rate: Expected arrivals per unit time (scaled by hidden.true_demand_intensity) - side_probs: Probability distribution over BUY/SELL sides - """ - base_rate: float = 10.0 - side_probs: dict[Side, float] = None - - def __post_init__(self): - if self.side_probs is None: - self.side_probs = {Side.BUY: 1.0} - -class PoissonArrivalModel: - """Homogeneous Poisson arrival process. - - Generates arrivals at a constant rate (modulated by demand intensity). - Suitable for stationary demand or as a baseline model. - - The actual arrival count follows Poisson(rate * dt * intensity). - """ - - def __init__(self, cfg: PoissonArrivalConfig | None = None): - self.cfg = cfg or PoissonArrivalConfig() - - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, - rng: np.random.Generator) -> list[Opportunity]: - n_arrivals = poisson_arrivals(self.cfg.base_rate * hidden.true_demand_intensity, dt, rng) - opps = [] - for _ in range(n_arrivals): - inst_id = rng.integers(0, instruments.n) - side = rng.choice(list(self.cfg.side_probs.keys()), - p=list(self.cfg.side_probs.values())) - opps.append(Opportunity( - id=str(uuid4())[:8], type=OpportunityType.SESSION, - side=side, instrument_id=inst_id, size=1.0, t=t, - context={'segment': 'default'} - )) - return opps - -@dataclass -class HawkesArrivalConfig: - """Configuration for Hawkes self-exciting process. - - Attributes: - base_rate: Baseline arrival intensity - alpha: Excitation strength (how much each arrival increases intensity) - beta: Decay rate (how quickly excitation fades) - side_probs: Probability distribution over BUY/SELL sides - """ - base_rate: float = 5.0 - alpha: float = 0.5 - beta: float = 1.0 - side_probs: dict[Side, float] = None - - def __post_init__(self): - if self.side_probs is None: - self.side_probs = {Side.BUY: 0.5, Side.SELL: 0.5} - -class HawkesArrivalModel: - """Self-exciting Hawkes point process for clustered arrivals. - - Models order flow where arrivals cluster in time (momentum, herding). - Intensity: lambda(t) = base + alpha * sum(exp(-beta * (t - t_i))) - - Used for market making scenarios where orders arrive in bursts. - """ - - def __init__(self, cfg: HawkesArrivalConfig | None = None): - self.cfg = cfg or HawkesArrivalConfig() - self._history: np.ndarray = np.array([]) - - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, - rng: np.random.Generator) -> list[Opportunity]: - intensity = hawkes_intensity( - self.cfg.base_rate * hidden.true_demand_intensity, - self._history, self.cfg.alpha, self.cfg.beta, t - ) - n_arrivals = poisson_arrivals(intensity, dt, rng) - opps = [] - for i in range(n_arrivals): - arr_t = t + rng.uniform(0, dt) - self._history = np.append(self._history, arr_t) - inst_id = rng.integers(0, instruments.n) - side = rng.choice(list(self.cfg.side_probs.keys()), - p=list(self.cfg.side_probs.values())) - opps.append(Opportunity( - id=str(uuid4())[:8], type=OpportunityType.MARKET_ORDER, - side=side, instrument_id=inst_id, - size=rng.exponential(1.0), t=arr_t, - context={'intensity': intensity} - )) - # decay old history - self._history = self._history[self._history > t - 10] - return opps - -@dataclass -class SessionArrivalConfig: - """Configuration for retail session arrivals. - - Attributes: - sessions_per_step: Number of browsing sessions per step - views_per_session: (min, max) product views per session - contamination: Fraction of sessions that are scrapers/bots - """ - sessions_per_step: int = 20 - views_per_session: tuple[int, int] = (1, 5) - contamination: float = 0.0 - -class SessionArrivalModel: - """Retail browsing session model with multi-product views. - - Each session views multiple products, generating one opportunity per view. - Scraper sessions (controlled by contamination) view more products - but convert at lower rates (handled by ExecutionModel). - """ - - def __init__(self, cfg: SessionArrivalConfig | None = None): - self.cfg = cfg or SessionArrivalConfig() - - def sample(self, t: float, dt: float, instruments: InstrumentSet, - market: MarketState | None, hidden: HiddenState, - rng: np.random.Generator) -> list[Opportunity]: - n_sessions = self.cfg.sessions_per_step - contamination = hidden.contamination if hidden else self.cfg.contamination - opps = [] - - for _ in range(n_sessions): - is_scraper = rng.random() < contamination - n_views = rng.integers(*self.cfg.views_per_session) - sid = str(uuid4())[:8] - - # scrapers view more products - if is_scraper: - n_views = min(instruments.n, n_views * 3) - - viewed = rng.choice(instruments.n, size=min(n_views, instruments.n), replace=False) - for inst_id in viewed: - opps.append(Opportunity( - id=f"{sid}-{inst_id}", type=OpportunityType.SESSION, - side=Side.BUY, instrument_id=int(inst_id), size=1.0, t=t, - context={'session_id': sid, 'is_scraper': is_scraper, 'n_views': n_views} - )) - return opps diff --git a/lab/population/competitors.py b/lab/population/competitors.py deleted file mode 100644 index 9417709..0000000 --- a/lab/population/competitors.py +++ /dev/null @@ -1,189 +0,0 @@ -""" -Market and competitor models for external dynamics. - -This module provides models for competitor pricing (retail) and market dynamics (finance): -- StaticCompetitorModel: Fixed competitor prices -- ReactiveCompetitorModel: Competitor reacts to agent's prices, can trigger price wars -- StochasticCompetitorModel: Random walk competitor prices -- GBMMarketModel: Geometric Brownian Motion for asset mid-prices - -Each model implements the MarketModel protocol. -""" -from __future__ import annotations -from dataclasses import dataclass -import numpy as np -from ..outlet.types import Quote, MarketState, HiddenState -from ..outlet.math_util import clamp, ema - -@dataclass -class StaticCompetitorConfig: - """Configuration for static competitor. - - Attributes: - markup: Fixed percentage markup over reference prices - """ - markup: float = 0.1 - -class StaticCompetitorModel: - """Static competitor with fixed markup pricing. - - Competitor prices = reference * (1 + markup). - Useful as a baseline or for testing without competitor dynamics. - """ - - def __init__(self, cfg: StaticCompetitorConfig | None = None, refs: np.ndarray | None = None): - self.cfg = cfg or StaticCompetitorConfig() - self.refs = refs - - def step(self, t: float, self_quotes: Quote, hidden: HiddenState, - rng: np.random.Generator) -> MarketState: - refs = self.refs if self.refs is not None else self_quotes.prices - comp_prices = refs * (1 + self.cfg.markup) - return MarketState(competitor_quotes=comp_prices, regime='static', t=t) - -@dataclass -class ReactiveCompetitorConfig: - """Configuration for reactive competitor. - - Attributes: - follow_weight: Smoothing weight for price following (0=ignore, 1=instant) - band_pct: Maximum deviation from reference prices - war_threshold: Relative price diff that triggers price war - war_aggression: How much competitor cuts prices during war - """ - follow_weight: float = 0.3 - band_pct: float = 0.1 - war_threshold: float = -0.15 - war_aggression: float = 0.2 - -class ReactiveCompetitorModel: - """Competitor that reacts to agent's prices with price war dynamics. - - The competitor follows the agent's prices with smoothing. - If the agent undercuts significantly (beyond war_threshold), - a price war is triggered where the competitor becomes more aggressive. - - This creates non-stationary dynamics that test policy robustness. - """ - - def __init__(self, cfg: ReactiveCompetitorConfig | None = None, refs: np.ndarray | None = None): - self.cfg = cfg or ReactiveCompetitorConfig() - self.refs = refs - self._prices: np.ndarray | None = None - self._in_war: bool = False - - def step(self, t: float, self_quotes: Quote, hidden: HiddenState, - rng: np.random.Generator) -> MarketState: - refs = self.refs if self.refs is not None else self_quotes.prices - c = self.cfg - - if self._prices is None: - self._prices = refs.copy() - - # check for price war trigger - relative_diff = (self_quotes.prices - self._prices) / (self._prices + 1e-8) - if np.any(relative_diff < c.war_threshold): - self._in_war = True - elif np.all(relative_diff > -c.war_threshold / 2): - self._in_war = False - - # update prices - if self._in_war: - target = self_quotes.prices * (1 - c.war_aggression) - hidden.regime = 'price_war' - else: - target = self_quotes.prices * (1 + c.follow_weight * 0.05) - hidden.regime = 'normal' - - # follow with smoothing - new_prices = np.array([ema(old, new, c.follow_weight) - for old, new in zip(self._prices, target)]) - - # stay within band - new_prices = clamp(new_prices, refs * (1 - c.band_pct), refs * (1 + c.band_pct)) - self._prices = new_prices - - return MarketState(competitor_quotes=new_prices, regime=hidden.regime, t=t) - -@dataclass -class StochasticCompetitorConfig: - """Configuration for stochastic competitor. - - Attributes: - drift: Price drift per step - volatility: Price volatility (std of random shocks) - mean_revert: Mean reversion strength toward reference - """ - drift: float = 0.0 - volatility: float = 0.02 - mean_revert: float = 0.1 - -class StochasticCompetitorModel: - """Ornstein-Uhlenbeck style stochastic competitor prices. - - Prices follow: dP = drift + mean_revert*(ref - P) + volatility*P*dW - - Provides non-stationary competitor dynamics independent of agent actions. - Useful for testing robustness to market noise. - """ - - def __init__(self, cfg: StochasticCompetitorConfig | None = None, refs: np.ndarray | None = None): - self.cfg = cfg or StochasticCompetitorConfig() - self.refs = refs - self._prices: np.ndarray | None = None - - def step(self, t: float, self_quotes: Quote, hidden: HiddenState, - rng: np.random.Generator) -> MarketState: - refs = self.refs if self.refs is not None else self_quotes.prices - c = self.cfg - - if self._prices is None: - self._prices = refs.copy() - - # Ornstein-Uhlenbeck style dynamics - n = len(self._prices) - noise = rng.normal(0, c.volatility, n) - reversion = c.mean_revert * (refs - self._prices) - self._prices = self._prices + c.drift + reversion + noise * self._prices - self._prices = np.maximum(self._prices, refs * 0.5) - - return MarketState(competitor_quotes=self._prices.copy(), regime='stochastic', t=t) - -@dataclass -class GBMMarketConfig: - """Configuration for GBM market model. - - Attributes: - mu: Price drift (expected return) - sigma: Price volatility - dt: Time step size - """ - mu: float = 0.0 - sigma: float = 0.1 - dt: float = 1.0 - -class GBMMarketModel: - """Geometric Brownian Motion model for asset mid-prices. - - Standard Black-Scholes dynamics: dS = mu*S*dt + sigma*S*dW - - Used for market making scenarios where the underlying asset price - follows a random walk. The agent quotes around this moving mid-price. - """ - - def __init__(self, cfg: GBMMarketConfig | None = None, initial: np.ndarray | None = None): - self.cfg = cfg or GBMMarketConfig() - self._mids = initial - - def step(self, t: float, self_quotes: Quote, hidden: HiddenState, - rng: np.random.Generator) -> MarketState: - if self._mids is None: - self._mids = self_quotes.prices.copy() - - c = self.cfg - n = len(self._mids) - z = rng.standard_normal(n) - self._mids = self._mids * np.exp((c.mu - 0.5*c.sigma**2)*c.dt + c.sigma*np.sqrt(c.dt)*z) - - vol = np.full(n, c.sigma) - return MarketState(mid_prices=self._mids.copy(), volatility=vol, regime='gbm', t=t) diff --git a/lab/population/execution.py b/lab/population/execution.py deleted file mode 100644 index 97484b2..0000000 --- a/lab/population/execution.py +++ /dev/null @@ -1,174 +0,0 @@ -""" -Execution models for computing acceptance/fill probabilities. - -This module provides different models for how opportunities convert to executions: -- ElasticityExecutionModel: Price elasticity with competitor cross-effects (retail) -- IntensityExecutionModel: Distance-based fill intensity (market making) -- LogitExecutionModel: Discrete choice model - -Each model implements the ExecutionModel protocol. -""" -from __future__ import annotations -from dataclasses import dataclass -from typing import Any -import numpy as np -from ..outlet.types import Opportunity, Quote, InstrumentSet, MarketState -from ..outlet.constants import Side -from ..outlet.math_util import sigmoid, safe_log, intensity_decay, EPS - -@dataclass -class ElasticityConfig: - """Configuration for price elasticity execution model. - - Attributes: - base_prob: Baseline purchase probability at reference price - price_sensitivity: Own-price elasticity coefficient - cross_elasticity: Competitor price cross-elasticity - scraper_conversion: Multiplier for scraper conversion (typically << 1) - """ - base_prob: float = 0.3 - price_sensitivity: float = 2.0 - cross_elasticity: float = 0.5 - scraper_conversion: float = 0.01 - -class ElasticityExecutionModel: - """Price elasticity model for retail dynamic pricing. - - P(buy) = base_prob * exp(-sensitivity * log(price/ref)) * cross_effect * scraper_mult - - Higher prices reduce purchase probability exponentially. - Competitor undercutting shifts demand away from the platform. - Scrapers convert at a much lower rate (reconnaissance, not purchase). - """ - - def __init__(self, cfg: ElasticityConfig | None = None): - self.cfg = cfg or ElasticityConfig() - - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - idx = int(opp.instrument_id) - price = quote.prices[idx] - ref = instruments.refs[idx] - - # base probability adjusted by price ratio - log_ratio = safe_log(price / ref) - prob = self.cfg.base_prob * np.exp(-self.cfg.price_sensitivity * log_ratio) - - # cross-elasticity: competitor undercutting increases their share - if market and market.competitor_quotes is not None: - comp_price = market.competitor_quotes[idx] - if comp_price < price: - prob *= np.exp(-self.cfg.cross_elasticity * (price - comp_price) / ref) - - # scrapers convert at much lower rate - if opp.context.get('is_scraper', False): - prob *= self.cfg.scraper_conversion - - return float(np.clip(prob, 0, 1)) - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, - context: dict[str, Any] | None = None) -> np.ndarray: - # simple imputation: assume fills = prob * exposures, invert - exposures = context.get('exposures', fills) if context else fills - avg_prob = self.cfg.base_prob - return fills / (avg_prob + EPS) - -@dataclass -class IntensityConfig: - """Configuration for intensity-based execution model. - - Attributes: - base_intensity: Baseline fill intensity - kappa: Decay rate with distance from mid-price - vol_scale: Volatility multiplier for fill intensity - """ - base_intensity: float = 1.0 - kappa: float = 1.5 - vol_scale: float = 0.5 - -class IntensityExecutionModel: - """Avellaneda-Stoikov style fill intensity for market making. - - Fill probability decays exponentially with distance from mid-price: - P(fill) = base * exp(-kappa * |quote - mid|) * (1 + vol_scale * sigma) - - Tighter spreads (closer to mid) have higher fill probability. - Higher volatility increases fill probability (more aggressive traders). - """ - - def __init__(self, cfg: IntensityConfig | None = None): - self.cfg = cfg or IntensityConfig() - - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - idx = int(opp.instrument_id) - - # get mid price from market or use quote price - if market and market.mid_prices is not None: - mid = market.mid_prices[idx] - else: - mid = quote.prices[idx] - - # compute distance from mid - if opp.side == Side.BUY: - exec_price = quote.asks[idx] if quote.asks is not None else quote.prices[idx] - distance = exec_price - mid - else: - exec_price = quote.bids[idx] if quote.bids is not None else quote.prices[idx] - distance = mid - exec_price - - # intensity decays with distance - intensity = self.cfg.base_intensity * intensity_decay(abs(distance), self.cfg.kappa) - - # volatility increases fill probability - if market and market.volatility is not None: - vol = market.volatility[idx] - intensity *= (1 + self.cfg.vol_scale * vol) - - return float(np.clip(intensity, 0, 1)) - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, - context: dict[str, Any] | None = None) -> np.ndarray: - return fills # market making doesn't have same censorship concept - -@dataclass -class LogitConfig: - """Configuration for logit discrete choice model. - - Attributes: - beta_0: Intercept (base utility) - beta_price: Price coefficient (typically negative) - beta_quality: Quality attribute coefficient - """ - beta_0: float = 0.5 - beta_price: float = -1.5 - beta_quality: float = 0.3 - -class LogitExecutionModel: - """Discrete choice logit model for purchase probability. - - Utility: U = beta_0 + beta_price * (price/ref) + beta_quality * quality - P(buy) = sigmoid(U) - - Provides a theoretically grounded demand model from economics literature. - """ - - def __init__(self, cfg: LogitConfig | None = None): - self.cfg = cfg or LogitConfig() - - def prob(self, opp: Opportunity, quote: Quote, instruments: InstrumentSet, - market: MarketState | None, rng: np.random.Generator) -> float: - idx = int(opp.instrument_id) - price = quote.prices[idx] - ref = instruments.refs[idx] - quality = instruments.instruments[idx].attrs.get('quality', 0.5) - - # utility - u = self.cfg.beta_0 + self.cfg.beta_price * (price / ref) + self.cfg.beta_quality * quality - - # choice probability via sigmoid - return float(sigmoid(u)) - - def uncensor(self, fills: np.ndarray, instruments: InstrumentSet, - context: dict[str, Any] | None = None) -> np.ndarray: - return fills / (self.cfg.beta_0 + EPS) diff --git a/lab/run_example.py b/lab/run_example.py deleted file mode 100644 index ebe0f18..0000000 --- a/lab/run_example.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -"""Example script demonstrating the Quote-Control platform""" -import sys -from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -import numpy as np -from lab.config import make_retail_platform, make_market_making_platform -from lab.experiments.eval import (rollout, compare_policies, fixed_price_policy, - cost_plus_margin_policy, random_walk_policy) - -def demo_retail(): - print("=" * 60) - print("RETAIL DYNAMIC PRICING DEMO") - print("=" * 60) - - platform = make_retail_platform() - print(f"Instruments: {platform.instruments.n}") - print(f"Reference prices: {platform.instruments.refs[:5].round(2)}...") - - # compare policies - policies = { - 'fixed': fixed_price_policy(platform.instruments.refs), - 'cost_plus_30%': cost_plus_margin_policy(platform.instruments.costs, 0.3), - 'cost_plus_50%': cost_plus_margin_policy(platform.instruments.costs, 0.5), - 'random_walk': random_walk_policy(platform.instruments.refs, 0.03), - } - - results = compare_policies(platform, policies, n_steps=100, n_runs=3) - - print("\nPolicy Comparison (100 steps, 3 runs):") - print("-" * 50) - for name, r in sorted(results.items(), key=lambda x: -x[1]['mean_pnl']): - print(f"{name:20s} PnL={r['mean_pnl']:8.1f} +/- {r['std_reward']:6.1f} " - f"conv={r['mean_conversion']:.3f}") - -def demo_market_making(): - print("\n" + "=" * 60) - print("MARKET MAKING DEMO") - print("=" * 60) - - platform = make_market_making_platform() - print(f"Instruments: {platform.instruments.n}") - print(f"Initial mids: {platform.instruments.refs.round(2)}") - - # simple policy: quote at mid with fixed spread - def mm_policy(obs: np.ndarray, t: int): - mids = platform.instruments.refs # would use obs in real policy - return mids, 1.0 - - result = rollout(platform, mm_policy, n_steps=200, seed=42) - print(f"\nRollout (200 steps):") - print(f" Total PnL: {result.total_pnl:.2f}") - print(f" Avg conversion: {result.avg_conversion:.3f}") - print(f" Total spread capture: {sum(m.spread_capture for m in result.metrics):.2f}") - -if __name__ == '__main__': - demo_retail() - demo_market_making()