shock: defining new lab environment and formulation

This commit is contained in:
2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions

318
lab/outlet/types.py Normal file
View File

@@ -0,0 +1,318 @@
"""
Core data types for the Quote-Control simulator.
This module defines the fundamental data structures used throughout the platform:
- Identifiers (InstrumentId, OpportunityId, AgentId)
- Domain objects (Instrument, Quote, Opportunity, Execution)
- Logging structures (StepEvent, StepLogs, StepMetrics)
- State containers (MarketState, HiddenState, Observation, StepResult)
All dataclasses are designed to be serializable and numpy-compatible.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, NewType
import numpy as np
from .constants import Side, InstrumentType, OpportunityType, EventType
InstrumentId = NewType('InstrumentId', int) # unique instrument index
OpportunityId = NewType('OpportunityId', str) # unique opportunity/session ID
AgentId = NewType('AgentId', str) # unique agent/actor ID
@dataclass
class Instrument:
"""Represents a priceable entity in the simulation.
An instrument can be a retail SKU, financial asset, loan product, or subscription.
The cost_basis represents the fundamental value (marginal cost for retail,
mid-price for assets, funding rate for loans).
Attributes:
id: Unique identifier for this instrument
type: Category of instrument (SKU, ASSET, LOAN, SUBSCRIPTION)
cost_basis: Fundamental cost or value (marginal cost, mid-price, funding rate)
reference_price: Base or fair price used for action scaling
attrs: Additional attributes (quality score, category, volatility, etc.)
"""
id: InstrumentId
type: InstrumentType
cost_basis: float
reference_price: float
attrs: dict[str, Any] = field(default_factory=dict)
@dataclass
class InstrumentSet:
"""Collection of instruments with optional position tracking.
Provides vectorized access to instrument properties for efficient computation.
Position can be positive (long/inventory) or negative (short) for financial assets.
Attributes:
instruments: List of Instrument objects
position: Current position per instrument (None = unlimited capacity)
Properties:
n: Number of instruments
costs: Vector of cost bases
refs: Vector of reference prices
"""
instruments: list[Instrument]
position: np.ndarray | None = None
@property
def n(self) -> int: return len(self.instruments)
@property
def costs(self) -> np.ndarray: return np.array([i.cost_basis for i in self.instruments], np.float32)
@property
def refs(self) -> np.ndarray: return np.array([i.reference_price for i in self.instruments], np.float32)
@dataclass
class Quote:
"""Price quote set by the policy - the action in the MDP.
Supports multiple quoting mechanisms:
- Posted price: only `prices` field used
- Two-sided: `prices` as mid, `spreads` for bid-ask width
- Auction: `prices` as reserve prices
The propensity field is critical for off-policy evaluation (OPE).
Attributes:
prices: Posted prices (retail) or mid-quotes (market making)
spreads: Bid-ask spread width for two-sided quoting (None for posted price)
propensity: P(this quote | behavior policy) for importance sampling
metadata: Additional info (prev_prices for delta constraints, etc.)
Properties:
bids: Computed bid prices (mid - spread/2)
asks: Computed ask prices (mid + spread/2)
"""
prices: np.ndarray
spreads: np.ndarray | None = None
propensity: float = 1.0
metadata: dict[str, Any] = field(default_factory=dict)
@property
def bids(self) -> np.ndarray | None:
return self.prices - self.spreads/2 if self.spreads is not None else None
@property
def asks(self) -> np.ndarray | None:
return self.prices + self.spreads/2 if self.spreads is not None else None
@dataclass
class Opportunity:
"""An arrival event that may result in a transaction.
Opportunities are the demand side of the simulation:
- Retail: browsing session with purchase intent
- Market making: incoming market order
- Lending: loan application
The context dict carries segment/type information used by execution models.
Attributes:
id: Unique identifier for this opportunity
type: Category (SESSION, MARKET_ORDER, REQUEST)
side: BUY or SELL intent
instrument_id: Which instrument the opportunity targets
size: Requested transaction size (units, shares, principal)
t: Arrival timestamp
context: Segment info (is_scraper, credit_score, urgency, etc.)
"""
id: OpportunityId
type: OpportunityType
side: Side
instrument_id: InstrumentId
size: float = 1.0
t: float = 0.0
context: dict[str, Any] = field(default_factory=dict)
@dataclass
class Execution:
"""A realized transaction after acceptance and position censorship.
The difference between size_requested and size_filled represents
censored demand due to inventory/position constraints.
Attributes:
opportunity_id: Links back to the originating Opportunity
instrument_id: Which instrument was traded
side: BUY or SELL
size_requested: Original requested size (true demand)
size_filled: Actual filled size after censorship
price: Execution price
propensity: Combined propensity for OPE (quote * acceptance)
t: Execution timestamp
"""
opportunity_id: OpportunityId
instrument_id: InstrumentId
side: Side
size_requested: float
size_filled: float
price: float
propensity: float = 1.0
t: float = 0.0
@dataclass
class StepEvent:
"""Generic logged event"""
t: float
type: EventType
instrument_id: InstrumentId | None = None
opportunity_id: OpportunityId | None = None
price: float | None = None
size: float | None = None
propensity: float = 1.0
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class StepLogs:
"""Container for all logging data from a simulation step.
Supports both detailed event logging (for OPE) and aggregate-only mode
(for fast simulation). The true_demand vs censored_fills distinction
is critical for research on demand estimation under censorship.
Attributes:
events: Detailed event log (None if LogLevel != FULL)
executions: List of executed transactions (None if LogLevel != FULL)
aggregates: Always-available aggregate statistics
true_demand: Oracle demand before censorship (for research, not in obs)
censored_fills: Realized fills after position constraints (observable)
"""
events: list[StepEvent] | None = None
executions: list[Execution] | None = None
aggregates: dict[str, Any] = field(default_factory=dict)
true_demand: np.ndarray | None = None
censored_fills: np.ndarray | None = None
@dataclass
class StepMetrics:
"""Computed metrics for a single simulation step.
Metrics are domain-aware: retail uses revenue/cost/holding_cost,
market making uses spread_capture and inventory risk.
Attributes:
pnl: Profit and loss (revenue - cost for retail, mark-to-market for finance)
revenue: Gross revenue from sales/executions
cost: Cost of goods sold or position acquisition cost
units_traded: Total units/shares transacted
position_cost: Holding cost (retail) or inventory risk penalty (finance)
lost_opportunity: Cost of stockouts or missed fills
spread_capture: Bid-ask spread captured (market making)
volatility: Price volatility metric for UX consideration
conversion: Fill rate (executions / opportunities)
per_instrument: Per-instrument breakdowns (fills, demand, etc.)
"""
pnl: float = 0.0
revenue: float = 0.0
cost: float = 0.0
units_traded: float = 0.0
position_cost: float = 0.0
lost_opportunity: float = 0.0
spread_capture: float = 0.0
volatility: float = 0.0
conversion: float = 0.0
per_instrument: dict[str, np.ndarray] = field(default_factory=dict)
@dataclass
class MarketState:
"""External market conditions and competitor state.
For retail: competitor_quotes drives cross-elasticity effects.
For finance: mid_prices and volatility drive execution dynamics.
Attributes:
competitor_quotes: Competitor posted prices (retail)
mid_prices: Market mid-prices for assets (finance)
volatility: Per-instrument volatility estimate
regime: Market regime identifier (normal, price_war, high_vol, etc.)
t: Timestamp of this market state
"""
competitor_quotes: np.ndarray | None = None
mid_prices: np.ndarray | None = None
volatility: np.ndarray | None = None
regime: str = 'normal'
t: float = 0.0
@dataclass
class HiddenState:
"""Internal simulator state not exposed to the agent.
Contains oracle information for research analysis and
history needed for non-stationary dynamics.
Attributes:
true_demand_intensity: Latent demand multiplier
contamination: Fraction of arrivals that are adversarial/scraper
regime: Current market/competitor regime
quote_history: History of agent quotes for volatility calculation
market_history: History of market states for analysis
"""
true_demand_intensity: float = 1.0
contamination: float = 0.0
regime: str = 'normal'
quote_history: list[np.ndarray] = field(default_factory=list)
market_history: list[MarketState] = field(default_factory=list)
@dataclass
class Observation:
"""Observable state provided to the agent - censored view only.
Critical invariant: Observation never contains true_demand, only
censored fills. This enforces the censorship research setting.
Attributes:
quotes: Current posted quotes (the agent's last action)
position: Current inventory/position state
fills: Censored execution counts per instrument
exposures: Opportunity exposure counts per instrument
market: Observable market state (competitor prices, volatility)
t: Current timestep
extra: Additional observable features
Methods:
to_flat: Flatten to numpy array for gym compatibility
"""
quotes: np.ndarray
position: np.ndarray | None
fills: np.ndarray
exposures: np.ndarray
market: MarketState | None
t: int
extra: dict[str, Any] = field(default_factory=dict)
def to_flat(self) -> np.ndarray:
"""Flatten observation to 1D numpy array for gym environments."""
parts = [self.quotes, self.fills, self.exposures]
if self.position is not None: parts.append(self.position)
if self.market and self.market.competitor_quotes is not None:
parts.append(self.market.competitor_quotes)
return np.concatenate([p.flatten() for p in parts])
@dataclass
class StepResult:
"""Complete result from a simulation step.
Follows gymnasium convention for obs, reward, terminated, truncated, info.
Additionally provides metrics, logs, and hidden state for research.
Attributes:
obs: Observable state (censored)
reward: Scalar reward from objective function
terminated: Episode ended naturally (max_steps reached)
truncated: Episode ended early (bankruptcy, constraint violation)
info: Additional info dict (contains true_demand for research)
metrics: Computed metrics for this step
logs: Event logs and aggregates
hidden: Internal simulator state (oracle info)
"""
obs: Observation
reward: float
terminated: bool
truncated: bool
info: dict[str, Any]
metrics: StepMetrics
logs: StepLogs
hidden: HiddenState