Files
PHANTOM/lab/case/thesis/objectives.py

229 lines
9.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Thesis-specific objectives implementing robust pricing under contamination.
Implements the Maximin objective from Eq 23:
π* = argmax_π min_{Q ∈ U_ε} E_d~Q[R(p,d) - λ·COI(p)]
Key components:
- COIObjective: Cost of Information penalty (Definition 1)
- RobustStackelbergObjective: Full maximin objective with Wasserstein robustness
- UXPenalty: User experience degradation from volatility
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from ...outlet.objectives.base import BaseObjective, CompositeObjective
from ...outlet.types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
from ...outlet.math_util import safe_log, EPS
class COIObjective(BaseObjective):
"""Cost of Information penalty from Definition 1.
COI(π) = E[P] - p_min
The expected price premium over marginal cost represents the platform's
pricing power. Agent reconnaissance erodes this by revealing price
distribution to buyers.
We implement COI_leakage = f(τ') · InfoValue(p, τ')
where f(τ') is the estimated agent probability.
"""
def __init__(self, lambda_coi: float = 1.0, use_revelation: bool = False):
"""
Args:
lambda_coi: Weight on COI penalty
use_revelation: If True, use -log(π(p)) as info value (penalizes rare prices)
"""
self.lambda_coi = lambda_coi
self.use_revelation = use_revelation
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# COI_leakage = α · InfoValue
alpha = hidden.contamination
if self.use_revelation:
# revelation surrogate: rare prices reveal more about policy
# InfoValue = -log(π(p|τ')) ≈ surprise of the price
price_surprise = np.mean(np.abs(quote.prices - instruments.refs) / (instruments.refs + EPS))
info_value = price_surprise
else:
# query-tax surrogate: each agent query incurs constant leakage
info_value = 1.0
leakage = alpha * info_value
return -self.lambda_coi * leakage
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
margins = (quote.prices - instruments.costs) / (instruments.costs + EPS)
return {
'coi_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'contamination': alpha,
'avg_margin': float(np.mean(margins)),
}
@dataclass
class RobustObjectiveConfig:
"""Configuration for robust Stackelberg objective.
Attributes:
lambda_coi: Weight on COI penalty (λ in Eq 23)
lambda_ux: Weight on UX penalty
lambda_volatility: Weight on price volatility penalty
gamma_inventory: Inventory risk aversion
wasserstein_epsilon: Ambiguity set radius (ε in Eq 21)
"""
lambda_coi: float = 0.5
lambda_ux: float = 0.1
lambda_volatility: float = 0.2
gamma_inventory: float = 0.1
wasserstein_epsilon: float = 0.1
class RobustStackelbergObjective(BaseObjective):
"""Implements the Maximin Objective from thesis Eq 23.
π* = argmax_π min_{Q ∈ U_ε(P̂_N)} E_d~Q[R(p,d) - λ·COI(p)]
The objective balances:
1. Revenue R(p,d) from human purchases
2. COI penalty for information leakage to agents
3. UX penalty for price volatility
4. Inventory/holding costs
The min over ambiguity set U_ε is approximated by penalizing
high contamination scenarios more heavily.
"""
def __init__(self, cfg: RobustObjectiveConfig | None = None):
self.cfg = cfg or RobustObjectiveConfig()
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
cfg = self.cfg
# 1. base revenue (R(p,d))
revenue = metrics.revenue
cost = metrics.cost
profit = revenue - cost
# 2. COI penalty: scales with contamination and margin extraction
# high margins + high contamination = high leakage
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
coi_penalty = cfg.lambda_coi * avg_margin * alpha
# 3. UX penalty: price volatility harms legitimate users
volatility_penalty = cfg.lambda_volatility * metrics.volatility
# 4. inventory/position cost
position_penalty = cfg.gamma_inventory * metrics.position_cost
# 5. lost opportunity cost (stockouts)
lost_penalty = 0.1 * metrics.lost_opportunity
# robust adjustment: under adversarial distribution Q,
# expect lower revenue and higher costs
# approximate via worst-case contamination within ε-ball
worst_case_alpha = min(alpha + cfg.wasserstein_epsilon, 1.0)
robustness_penalty = cfg.wasserstein_epsilon * avg_margin * worst_case_alpha
total = profit - coi_penalty - volatility_penalty - position_penalty - lost_penalty - robustness_penalty
return total
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
cfg = self.cfg
alpha = hidden.contamination
margins = quote.prices - instruments.costs
avg_margin = float(np.mean(margins))
return {
'revenue': metrics.revenue,
'cost': metrics.cost,
'profit': metrics.revenue - metrics.cost,
'coi_penalty': -cfg.lambda_coi * avg_margin * alpha,
'volatility_penalty': -cfg.lambda_volatility * metrics.volatility,
'position_penalty': -cfg.gamma_inventory * metrics.position_cost,
'lost_penalty': -0.1 * metrics.lost_opportunity,
'robustness_penalty': -cfg.wasserstein_epsilon * avg_margin * min(alpha + cfg.wasserstein_epsilon, 1.0),
'contamination': alpha,
'avg_margin_pct': avg_margin / (float(np.mean(instruments.costs)) + EPS),
}
class UXPenalty(BaseObjective):
"""User experience penalty from price volatility.
High price volatility degrades UX for legitimate human users.
This term ensures the defense doesn't harm real customers while
protecting against agent reconnaissance.
"""
def __init__(self, scale: float = 1.0, max_acceptable_volatility: float = 0.1):
self.scale = scale
self.max_vol = max_acceptable_volatility
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
# penalty increases quadratically beyond threshold
excess_vol = max(0, metrics.volatility - self.max_vol)
return -self.scale * (excess_vol ** 2)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {
'ux_penalty': self.reward(quote, instruments, metrics, hidden, obs),
'volatility': metrics.volatility,
}
class AdaptiveObjective(BaseObjective):
"""Objective that adapts weights based on estimated contamination.
When contamination is low, focus on revenue maximization.
When contamination is high, increase COI defense weight.
"""
def __init__(self, base_lambda_coi: float = 0.3, max_lambda_coi: float = 2.0,
adaptation_rate: float = 2.0):
self.base_lambda = base_lambda_coi
self.max_lambda = max_lambda_coi
self.rate = adaptation_rate
def _adaptive_lambda(self, alpha: float) -> float:
# sigmoid scaling: λ(α) = base + (max-base) * sigmoid(rate*(α-0.5))
from ...outlet.math_util import sigmoid
scale = sigmoid(self.rate * (alpha - 0.3))
return self.base_lambda + (self.max_lambda - self.base_lambda) * scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
alpha = hidden.contamination
lambda_coi = self._adaptive_lambda(alpha)
profit = metrics.revenue - metrics.cost
margins = quote.prices - instruments.costs
coi_penalty = lambda_coi * float(np.mean(margins)) * alpha
return profit - coi_penalty
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
alpha = hidden.contamination
return {
'profit': metrics.revenue - metrics.cost,
'adaptive_lambda': self._adaptive_lambda(alpha),
'contamination': alpha,
}
def make_thesis_objective(lambda_coi: float = 0.5, lambda_ux: float = 0.1,
lambda_vol: float = 0.2) -> CompositeObjective:
"""Create the standard thesis objective composition."""
return CompositeObjective([
(RobustStackelbergObjective(RobustObjectiveConfig(
lambda_coi=lambda_coi, lambda_ux=lambda_ux, lambda_volatility=lambda_vol)), 1.0),
])