""" Core data types for the Quote-Control simulator. This module defines the fundamental data structures used throughout the platform: - Identifiers (InstrumentId, OpportunityId, AgentId) - Domain objects (Instrument, Quote, Opportunity, Execution) - Logging structures (StepEvent, StepLogs, StepMetrics) - State containers (MarketState, HiddenState, Observation, StepResult) All dataclasses are designed to be serializable and numpy-compatible. """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, NewType import numpy as np from .constants import Side, InstrumentType, OpportunityType, EventType InstrumentId = NewType('InstrumentId', int) # unique instrument index OpportunityId = NewType('OpportunityId', str) # unique opportunity/session ID AgentId = NewType('AgentId', str) # unique agent/actor ID @dataclass class Instrument: """Represents a priceable entity in the simulation. An instrument can be a retail SKU, financial asset, loan product, or subscription. The cost_basis represents the fundamental value (marginal cost for retail, mid-price for assets, funding rate for loans). Attributes: id: Unique identifier for this instrument type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION) cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate) reference_price: Base or fair price used for action scaling attrs: Additional attributes (quality score, category, volatility, etc.) """ id: InstrumentId type: InstrumentType cost_basis: float reference_price: float attrs: dict[str, Any] = field(default_factory=dict) @dataclass class InstrumentSet: """Collection of instruments with optional position tracking. Provides vectorized access to instrument properties for efficient computation. Position can be positive (long/inventory) or negative (short) for financial assets. Attributes: instruments: List of Instrument objects position: Current position per instrument (None = unlimited capacity) Properties: n: Number of instruments costs: Vector of cost bases refs: Vector of reference prices """ instruments: list[Instrument] position: np.ndarray | None = None @property def n(self) -> int: return len(self.instruments) @property def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32) @property def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32) @dataclass class Quote: """Price quote set by the policy - the action in the MDP. Supports multiple quoting mechanisms: - Posted price: only `prices` field used - Two-sided: `prices` as mid, `spreads` for bid-ask width - Auction: `prices` as reserve prices The propensity field is critical for off-policy evaluation (OPE). Attributes: prices: Posted prices (retail) or mid-quotes (market making) spreads: Bid-ask spread width for two-sided quoting (None for posted price) propensity: P(this quote | behavior policy) for importance sampling metadata: Additional info (prev_prices for delta constraints, etc.) Properties: bids: Computed bid prices (mid - spread/2) asks: Computed ask prices (mid + spread/2) """ prices: np.ndarray spreads: np.ndarray | None = None propensity: float = 1.0 metadata: dict[str, Any] = field(default_factory=dict) @property def bids(self) -> np.ndarray | None: return self.prices - self.spreads/2 if self.spreads is not None else None @property def asks(self) -> np.ndarray | None: return self.prices + self.spreads/2 if self.spreads is not None else None @dataclass class Opportunity: """An arrival event that may result in a transaction. Opportunities are the demand side of the simulation: - Retail: browsing session with purchase intent - Market making: incoming market order - Lending: loan application The context dict carries segment/type information used by execution models. Attributes: id: Unique identifier for this opportunity type: Category (SESSION, MARKET_ORDER, REQUEST) side: BUY or SELL intent instrument_id: Which instrument the opportunity targets size: Requested transaction size (units, shares, principal) t: Arrival timestamp context: Segment info (is_scraper, credit_score, urgency, etc.) """ id: OpportunityId type: OpportunityType side: Side instrument_id: InstrumentId size: float = 1.0 t: float = 0.0 context: dict[str, Any] = field(default_factory=dict) @dataclass class Execution: """A realized transaction after acceptance and position censorship. The difference between size_requested and size_filled represents censored demand due to inventory/position constraints. Attributes: opportunity_id: Links back to the originating Opportunity instrument_id: Which instrument was traded side: BUY or SELL size_requested: Original requested size (true demand) size_filled: Actual filled size after censorship price: Execution price propensity: Combined propensity for OPE (quote * acceptance) t: Execution timestamp """ opportunity_id: OpportunityId instrument_id: InstrumentId side: Side size_requested: float size_filled: float price: float propensity: float = 1.0 t: float = 0.0 @dataclass class StepEvent: """Generic logged event""" t: float type: EventType instrument_id: InstrumentId | None = None opportunity_id: OpportunityId | None = None price: float | None = None size: float | None = None propensity: float = 1.0 metadata: dict[str, Any] = field(default_factory=dict) @dataclass class StepLogs: """Container for all logging data from a simulation step. Supports both detailed event logging (for OPE) and aggregate-only mode (for fast simulation). The true_demand vs censored_fills distinction is critical for research on demand estimation under censorship. Attributes: events: Detailed event log (None if LogLevel != FULL) executions: List of executed transactions (None if LogLevel != FULL) aggregates: Always-available aggregate statistics true_demand: Oracle demand before censorship (for research, not in obs) censored_fills: Realized fills after position constraints (observable) """ events: list[StepEvent] | None = None executions: list[Execution] | None = None aggregates: dict[str, Any] = field(default_factory=dict) true_demand: np.ndarray | None = None censored_fills: np.ndarray | None = None @dataclass class StepMetrics: """Computed metrics for a single simulation step. Metrics are domain-aware: retail uses revenue/cost/holding_cost, market making uses spread_capture and inventory risk. Attributes: pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance) revenue: Gross revenue from sales/executions cost: Cost of goods sold or position acquisition cost units_traded: Total units/shares transacted position_cost: Holding cost (retail) or inventory risk penalty (finance) lost_opportunity: Cost of stockouts or missed fills spread_capture: Bid-ask spread captured (market making) volatility: Price volatility metric for UX consideration conversion: Fill rate (executions / opportunities) per_instrument: Per-instrument breakdowns (fills, demand, etc.) """ pnl: float = 0.0 revenue: float = 0.0 cost: float = 0.0 units_traded: float = 0.0 position_cost: float = 0.0 lost_opportunity: float = 0.0 spread_capture: float = 0.0 volatility: float = 0.0 conversion: float = 0.0 per_instrument: dict[str, np.ndarray] = field(default_factory=dict) @dataclass class MarketState: """External market conditions and competitor state. For retail: competitor_quotes drives cross-elasticity effects. For finance: mid_prices and volatility drive execution dynamics. Attributes: competitor_quotes: Competitor posted prices (retail) mid_prices: Market mid-prices for assets (finance) volatility: Per-instrument volatility estimate regime: Market regime identifier (normal, price_war, high_vol, etc.) t: Timestamp of this market state """ competitor_quotes: np.ndarray | None = None mid_prices: np.ndarray | None = None volatility: np.ndarray | None = None regime: str = 'normal' t: float = 0.0 @dataclass class HiddenState: """Internal simulator state not exposed to the agent. Contains oracle information for research analysis and history needed for non-stationary dynamics. Attributes: true_demand_intensity: Latent demand multiplier contamination: Fraction of arrivals that are adversarial/scraper regime: Current market/competitor regime quote_history: History of agent quotes for volatility calculation market_history: History of market states for analysis """ true_demand_intensity: float = 1.0 contamination: float = 0.0 regime: str = 'normal' quote_history: list[np.ndarray] = field(default_factory=list) market_history: list[MarketState] = field(default_factory=list) @dataclass class Observation: """Observable state provided to the agent - censored view only. Critical invariant: Observation never contains true_demand, only censored fills. This enforces the censorship research setting. Attributes: quotes: Current posted quotes (the agent's last action) position: Current inventory/position state fills: Censored execution counts per instrument exposures: Opportunity exposure counts per instrument market: Observable market state (competitor prices, volatility) t: Current timestep extra: Additional observable features Methods: to_flat: Flatten to numpy array for gym compatibility """ quotes: np.ndarray position: np.ndarray | None fills: np.ndarray exposures: np.ndarray market: MarketState | None t: int extra: dict[str, Any] = field(default_factory=dict) def to_flat(self) -> np.ndarray: """Flatten observation to 1D numpy array for gym environments.""" parts = [self.quotes, self.fills, self.exposures] if self.position is not None: parts.append(self.position) if self.market and self.market.competitor_quotes is not None: parts.append(self.market.competitor_quotes) return np.concatenate([p.flatten() for p in parts]) @dataclass class StepResult: """Complete result from a simulation step. Follows gymnasium convention for obs, reward, terminated, truncated, info. Additionally provides metrics, logs, and hidden state for research. Attributes: obs: Observable state (censored) reward: Scalar reward from objective function terminated: Episode ended naturally (max_steps reached) truncated: Episode ended early (bankruptcy, constraint violation) info: Additional info dict (contains true_demand for research) metrics: Computed metrics for this step logs: Event logs and aggregates hidden: Internal simulator state (oracle info) """ obs: Observation reward: float terminated: bool truncated: bool info: dict[str, Any] metrics: StepMetrics logs: StepLogs hidden: HiddenState