shock: defining new lab environment and formulation

2026-06-01 09:03:35 +00:00 · 2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions
--- a/lab/case/thesis/objectives.py
+++ b/lab/case/thesis/objectives.py
@@ -0,0 +1,228 @@
+"""
+Thesis-specific objectives implementing robust pricing under contamination.
+
+Implements the Maximin objective from Eq 23:
+π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
+
+Key components:
+- COIObjective: Cost of Information penalty (Definition 1)
+- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
+- UXPenalty: User experience degradation from volatility
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ...outlet.objectives.base import BaseObjective, CompositeObjective
+from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+from ...outlet.math_util import safe_log, EPS
+
+class COIObjective(BaseObjective):
+    """Cost of Information penalty from Definition 1.
+
+    COI(π) = E[P] - p_min
+
+    The expected price premium over marginal cost represents the platform's
+    pricing power. Agent reconnaissance erodes this by revealing price
+    distribution to buyers.
+
+    We implement COI_leakage = f(τ') · InfoValue(p, τ')
+    where f(τ') is the estimated agent probability.
+    """
+
+    def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
+        """
+        Args:
+            lambda_coi: Weight on COI penalty
+            use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
+        """
+        self.lambda_coi = lambda_coi
+        self.use_revelation = use_revelation
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # COI_leakage = α · InfoValue
+        alpha = hidden.contamination
+
+        if self.use_revelation:
+            # revelation surrogate: rare prices reveal more about policy
+            # InfoValue = -log(π(p|τ')) ≈ surprise of the price
+            price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
+            info_value = price_surprise
+        else:
+            # query-tax surrogate: each agent query incurs constant leakage
+            info_value = 1.0
+
+        leakage = alpha * info_value
+        return -self.lambda_coi * leakage
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
+        return {
+            'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'contamination': alpha,
+            'avg_margin': float(np.mean(margins)),
+        }
+
+@dataclass
+class RobustObjectiveConfig:
+    """Configuration for robust Stackelberg objective.
+
+    Attributes:
+        lambda_coi: Weight on COI penalty (λ in Eq 23)
+        lambda_ux: Weight on UX penalty
+        lambda_volatility: Weight on price volatility penalty
+        gamma_inventory: Inventory risk aversion
+        wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
+    """
+    lambda_coi: float = 0.5
+    lambda_ux: float = 0.1
+    lambda_volatility: float = 0.2
+    gamma_inventory: float = 0.1
+    wasserstein_epsilon: float = 0.1
+
+class RobustStackelbergObjective(BaseObjective):
+    """Implements the Maximin Objective from thesis Eq 23.
+
+    π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
+
+    The objective balances:
+    1. Revenue R(p,d) from human purchases
+    2. COI penalty for information leakage to agents
+    3. UX penalty for price volatility
+    4. Inventory/holding costs
+
+    The min over ambiguity set U_ε is approximated by penalizing
+    high contamination scenarios more heavily.
+    """
+
+    def __init__(self, cfg: RobustObjectiveConfig | None = None):
+        self.cfg = cfg or RobustObjectiveConfig()
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        cfg = self.cfg
+
+        # 1. base revenue (R(p,d))
+        revenue = metrics.revenue
+        cost = metrics.cost
+        profit = revenue - cost
+
+        # 2. COI penalty: scales with contamination and margin extraction
+        # high margins + high contamination = high leakage
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+        coi_penalty = cfg.lambda_coi * avg_margin * alpha
+
+        # 3. UX penalty: price volatility harms legitimate users
+        volatility_penalty = cfg.lambda_volatility * metrics.volatility
+
+        # 4. inventory/position cost
+        position_penalty = cfg.gamma_inventory * metrics.position_cost
+
+        # 5. lost opportunity cost (stockouts)
+        lost_penalty = 0.1 * metrics.lost_opportunity
+
+        # robust adjustment: under adversarial distribution Q,
+        # expect lower revenue and higher costs
+        # approximate via worst-case contamination within ε-ball
+        worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
+        robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
+
+        total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
+
+        return total
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        cfg = self.cfg
+        alpha = hidden.contamination
+        margins = quote.prices - instruments.costs
+        avg_margin = float(np.mean(margins))
+
+        return {
+            'revenue': metrics.revenue,
+            'cost': metrics.cost,
+            'profit': metrics.revenue - metrics.cost,
+            'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
+            'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
+            'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
+            'lost_penalty': -0.1 * metrics.lost_opportunity,
+            'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
+            'contamination': alpha,
+            'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
+        }
+
+class UXPenalty(BaseObjective):
+    """User experience penalty from price volatility.
+
+    High price volatility degrades UX for legitimate human users.
+    This term ensures the defense doesn't harm real customers while
+    protecting against agent reconnaissance.
+    """
+
+    def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
+        self.scale = scale
+        self.max_vol = max_acceptable_volatility
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        # penalty increases quadratically beyond threshold
+        excess_vol = max(0, metrics.volatility - self.max_vol)
+        return -self.scale * (excess_vol ** 2)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        return {
+            'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
+            'volatility': metrics.volatility,
+        }
+
+class AdaptiveObjective(BaseObjective):
+    """Objective that adapts weights based on estimated contamination.
+
+    When contamination is low, focus on revenue maximization.
+    When contamination is high, increase COI defense weight.
+    """
+
+    def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
+                 adaptation_rate: float = 2.0):
+        self.base_lambda = base_lambda_coi
+        self.max_lambda = max_lambda_coi
+        self.rate = adaptation_rate
+
+    def _adaptive_lambda(self, alpha: float) -> float:
+        # sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
+        from ...outlet.math_util import sigmoid
+        scale = sigmoid(self.rate * (alpha - 0.3))
+        return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        alpha = hidden.contamination
+        lambda_coi = self._adaptive_lambda(alpha)
+
+        profit = metrics.revenue - metrics.cost
+        margins = quote.prices - instruments.costs
+        coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
+
+        return profit - coi_penalty
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        alpha = hidden.contamination
+        return {
+            'profit': metrics.revenue - metrics.cost,
+            'adaptive_lambda': self._adaptive_lambda(alpha),
+            'contamination': alpha,
+        }
+
+def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
+                          lambda_vol: float = 0.2) -> CompositeObjective:
+    """Create the standard thesis objective composition."""
+    return CompositeObjective([
+        (RobustStackelbergObjective(RobustObjectiveConfig(
+            lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
+    ])