shock: defining new lab environment and formulation

2026-05-31 16:43:36 +00:00 · 2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions
--- a/lab/outlet/types.py
+++ b/lab/outlet/types.py
@@ -0,0 +1,318 @@
+"""
+Core data types for the Quote-Control simulator.
+
+This module defines the fundamental data structures used throughout the platform:
+- Identifiers (InstrumentId, OpportunityId, AgentId)
+- Domain objects (Instrument, Quote, Opportunity, Execution)
+- Logging structures (StepEvent, StepLogs, StepMetrics)
+- State containers (MarketState, HiddenState, Observation, StepResult)
+
+All dataclasses are designed to be serializable and numpy-compatible.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, NewType
+import numpy as np
+from .constants import Side, InstrumentType, OpportunityType, EventType
+
+InstrumentId = NewType('InstrumentId', int)  # unique instrument index
+OpportunityId = NewType('OpportunityId', str)  # unique opportunity/session ID
+AgentId = NewType('AgentId', str)  # unique agent/actor ID
+
+@dataclass
+class Instrument:
+    """Represents a priceable entity in the simulation.
+
+    An instrument can be a retail SKU, financial asset, loan product, or subscription.
+    The cost_basis represents the fundamental value (marginal cost for retail,
+    mid-price for assets, funding rate for loans).
+
+    Attributes:
+        id: Unique identifier for this instrument
+        type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION)
+        cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate)
+        reference_price: Base or fair price used for action scaling
+        attrs: Additional attributes (quality score, category, volatility, etc.)
+    """
+    id: InstrumentId
+    type: InstrumentType
+    cost_basis: float
+    reference_price: float
+    attrs: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class InstrumentSet:
+    """Collection of instruments with optional position tracking.
+
+    Provides vectorized access to instrument properties for efficient computation.
+    Position can be positive (long/inventory) or negative (short) for financial assets.
+
+    Attributes:
+        instruments: List of Instrument objects
+        position: Current position per instrument (None = unlimited capacity)
+
+    Properties:
+        n: Number of instruments
+        costs: Vector of cost bases
+        refs: Vector of reference prices
+    """
+    instruments: list[Instrument]
+    position: np.ndarray | None = None
+
+    @property
+    def n(self) -> int: return len(self.instruments)
+    @property
+    def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32)
+    @property
+    def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32)
+
+@dataclass
+class Quote:
+    """Price quote set by the policy - the action in the MDP.
+
+    Supports multiple quoting mechanisms:
+    - Posted price: only `prices` field used
+    - Two-sided: `prices` as mid, `spreads` for bid-ask width
+    - Auction: `prices` as reserve prices
+
+    The propensity field is critical for off-policy evaluation (OPE).
+
+    Attributes:
+        prices: Posted prices (retail) or mid-quotes (market making)
+        spreads: Bid-ask spread width for two-sided quoting (None for posted price)
+        propensity: P(this quote | behavior policy) for importance sampling
+        metadata: Additional info (prev_prices for delta constraints, etc.)
+
+    Properties:
+        bids: Computed bid prices (mid - spread/2)
+        asks: Computed ask prices (mid + spread/2)
+    """
+    prices: np.ndarray
+    spreads: np.ndarray | None = None
+    propensity: float = 1.0
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    @property
+    def bids(self) -> np.ndarray | None:
+        return self.prices - self.spreads/2 if self.spreads is not None else None
+    @property
+    def asks(self) -> np.ndarray | None:
+        return self.prices + self.spreads/2 if self.spreads is not None else None
+
+@dataclass
+class Opportunity:
+    """An arrival event that may result in a transaction.
+
+    Opportunities are the demand side of the simulation:
+    - Retail: browsing session with purchase intent
+    - Market making: incoming market order
+    - Lending: loan application
+
+    The context dict carries segment/type information used by execution models.
+
+    Attributes:
+        id: Unique identifier for this opportunity
+        type: Category (SESSION, MARKET_ORDER, REQUEST)
+        side: BUY or SELL intent
+        instrument_id: Which instrument the opportunity targets
+        size: Requested transaction size (units, shares, principal)
+        t: Arrival timestamp
+        context: Segment info (is_scraper, credit_score, urgency, etc.)
+    """
+    id: OpportunityId
+    type: OpportunityType
+    side: Side
+    instrument_id: InstrumentId
+    size: float = 1.0
+    t: float = 0.0
+    context: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class Execution:
+    """A realized transaction after acceptance and position censorship.
+
+    The difference between size_requested and size_filled represents
+    censored demand due to inventory/position constraints.
+
+    Attributes:
+        opportunity_id: Links back to the originating Opportunity
+        instrument_id: Which instrument was traded
+        side: BUY or SELL
+        size_requested: Original requested size (true demand)
+        size_filled: Actual filled size after censorship
+        price: Execution price
+        propensity: Combined propensity for OPE (quote * acceptance)
+        t: Execution timestamp
+    """
+    opportunity_id: OpportunityId
+    instrument_id: InstrumentId
+    side: Side
+    size_requested: float
+    size_filled: float
+    price: float
+    propensity: float = 1.0
+    t: float = 0.0
+
+@dataclass
+class StepEvent:
+    """Generic logged event"""
+    t: float
+    type: EventType
+    instrument_id: InstrumentId | None = None
+    opportunity_id: OpportunityId | None = None
+    price: float | None = None
+    size: float | None = None
+    propensity: float = 1.0
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class StepLogs:
+    """Container for all logging data from a simulation step.
+
+    Supports both detailed event logging (for OPE) and aggregate-only mode
+    (for fast simulation). The true_demand vs censored_fills distinction
+    is critical for research on demand estimation under censorship.
+
+    Attributes:
+        events: Detailed event log (None if LogLevel != FULL)
+        executions: List of executed transactions (None if LogLevel != FULL)
+        aggregates: Always-available aggregate statistics
+        true_demand: Oracle demand before censorship (for research, not in obs)
+        censored_fills: Realized fills after position constraints (observable)
+    """
+    events: list[StepEvent] | None = None
+    executions: list[Execution] | None = None
+    aggregates: dict[str, Any] = field(default_factory=dict)
+    true_demand: np.ndarray | None = None
+    censored_fills: np.ndarray | None = None
+
+@dataclass
+class StepMetrics:
+    """Computed metrics for a single simulation step.
+
+    Metrics are domain-aware: retail uses revenue/cost/holding_cost,
+    market making uses spread_capture and inventory risk.
+
+    Attributes:
+        pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance)
+        revenue: Gross revenue from sales/executions
+        cost: Cost of goods sold or position acquisition cost
+        units_traded: Total units/shares transacted
+        position_cost: Holding cost (retail) or inventory risk penalty (finance)
+        lost_opportunity: Cost of stockouts or missed fills
+        spread_capture: Bid-ask spread captured (market making)
+        volatility: Price volatility metric for UX consideration
+        conversion: Fill rate (executions / opportunities)
+        per_instrument: Per-instrument breakdowns (fills, demand, etc.)
+    """
+    pnl: float = 0.0
+    revenue: float = 0.0
+    cost: float = 0.0
+    units_traded: float = 0.0
+    position_cost: float = 0.0
+    lost_opportunity: float = 0.0
+    spread_capture: float = 0.0
+    volatility: float = 0.0
+    conversion: float = 0.0
+    per_instrument: dict[str, np.ndarray] = field(default_factory=dict)
+
+@dataclass
+class MarketState:
+    """External market conditions and competitor state.
+
+    For retail: competitor_quotes drives cross-elasticity effects.
+    For finance: mid_prices and volatility drive execution dynamics.
+
+    Attributes:
+        competitor_quotes: Competitor posted prices (retail)
+        mid_prices: Market mid-prices for assets (finance)
+        volatility: Per-instrument volatility estimate
+        regime: Market regime identifier (normal, price_war, high_vol, etc.)
+        t: Timestamp of this market state
+    """
+    competitor_quotes: np.ndarray | None = None
+    mid_prices: np.ndarray | None = None
+    volatility: np.ndarray | None = None
+    regime: str = 'normal'
+    t: float = 0.0
+
+@dataclass
+class HiddenState:
+    """Internal simulator state not exposed to the agent.
+
+    Contains oracle information for research analysis and
+    history needed for non-stationary dynamics.
+
+    Attributes:
+        true_demand_intensity: Latent demand multiplier
+        contamination: Fraction of arrivals that are adversarial/scraper
+        regime: Current market/competitor regime
+        quote_history: History of agent quotes for volatility calculation
+        market_history: History of market states for analysis
+    """
+    true_demand_intensity: float = 1.0
+    contamination: float = 0.0
+    regime: str = 'normal'
+    quote_history: list[np.ndarray] = field(default_factory=list)
+    market_history: list[MarketState] = field(default_factory=list)
+
+@dataclass
+class Observation:
+    """Observable state provided to the agent - censored view only.
+
+    Critical invariant: Observation never contains true_demand, only
+    censored fills. This enforces the censorship research setting.
+
+    Attributes:
+        quotes: Current posted quotes (the agent's last action)
+        position: Current inventory/position state
+        fills: Censored execution counts per instrument
+        exposures: Opportunity exposure counts per instrument
+        market: Observable market state (competitor prices, volatility)
+        t: Current timestep
+        extra: Additional observable features
+
+    Methods:
+        to_flat: Flatten to numpy array for gym compatibility
+    """
+    quotes: np.ndarray
+    position: np.ndarray | None
+    fills: np.ndarray
+    exposures: np.ndarray
+    market: MarketState | None
+    t: int
+    extra: dict[str, Any] = field(default_factory=dict)
+
+    def to_flat(self) -> np.ndarray:
+        """Flatten observation to 1D numpy array for gym environments."""
+        parts = [self.quotes, self.fills, self.exposures]
+        if self.position is not None: parts.append(self.position)
+        if self.market and self.market.competitor_quotes is not None:
+            parts.append(self.market.competitor_quotes)
+        return np.concatenate([p.flatten() for p in parts])
+
+@dataclass
+class StepResult:
+    """Complete result from a simulation step.
+
+    Follows gymnasium convention for obs, reward, terminated, truncated, info.
+    Additionally provides metrics, logs, and hidden state for research.
+
+    Attributes:
+        obs: Observable state (censored)
+        reward: Scalar reward from objective function
+        terminated: Episode ended naturally (max_steps reached)
+        truncated: Episode ended early (bankruptcy, constraint violation)
+        info: Additional info dict (contains true_demand for research)
+        metrics: Computed metrics for this step
+        logs: Event logs and aggregates
+        hidden: Internal simulator state (oracle info)
+    """
+    obs: Observation
+    reward: float
+    terminated: bool
+    truncated: bool
+    info: dict[str, Any]
+    metrics: StepMetrics
+    logs: StepLogs
+    hidden: HiddenState