Files
PHANTOM/engine/lib/wrappers.py

105 lines
3.5 KiB
Python

"""Economic metrics wrapper - calculates thesis-aligned KPIs and injects into info dict."""
import gymnasium as gym
import numpy as np
class EconomicMetricsWrapper(gym.Wrapper):
"""Calculates thesis-aligned economic metrics per step, injects into info.
Metrics follow thesis definitions:
- COI level: E[P] - p_min (Definition 1)
- Margin: (avg_price - p_min) / avg_price
- Regret: 1 - (revenue / baseline_revenue)
"""
def __init__(
self, env: gym.Env, p_min: float = 10.0, baseline_revenue: float | None = None
):
super().__init__(env)
self.p_min = p_min
self.baseline_revenue = baseline_revenue
self._price_history: list[np.ndarray] = []
self._revenue_history: list[float] = []
def reset(self, **kwargs):
obs, info = self.env.reset(**kwargs)
self._price_history = []
self._revenue_history = []
return obs, info
def step(self, action):
obs, reward, terminated, truncated, info = self.env.step(action)
# extract from unwrapped env
quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float)
effective_prices = np.asarray(
info.get("effective_prices", quoted_prices), dtype=float
)
if effective_prices.shape != quoted_prices.shape:
effective_prices = quoted_prices
demand_dict = self.env.unwrapped._demand
demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))])
# core calculations
revenue = float(info.get("revenue", np.sum(effective_prices * demand)))
quoted_revenue = float(np.sum(quoted_prices * demand))
avg_price = float(np.mean(effective_prices))
margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
coi_level = avg_price - self.p_min # E[P] - p_min per thesis Def 1
self._price_history.append(effective_prices.copy())
self._revenue_history.append(revenue)
# regret vs baseline (golden path)
regret = 0.0
if self.baseline_revenue and self.baseline_revenue > 0:
regret = 1.0 - (revenue / self.baseline_revenue)
# inject structured metrics into info
info["economics"] = {
"revenue": revenue,
"quoted_revenue": quoted_revenue,
"margin": margin,
"coi_level": coi_level,
"regret": regret,
}
for key in (
"coi_mix",
"coi_base",
"coi_leakage",
"coi_penalty",
"ux_penalty",
"volatility",
"upward_volatility",
"supra_penalty",
"supra_share",
"competitive_anchor",
"profit",
"cost_floor",
"reward_revenue",
"reward_total",
"agent_prob",
"alpha_adv",
"alpha_nominal",
"erosion_share",
"effective_price_mean",
):
if key in info:
info["economics"][key] = info[key]
info["prices"] = quoted_prices.copy()
info["effective_prices"] = effective_prices.copy()
info["demand"] = demand.copy()
return obs, reward, terminated, truncated, info
@property
def episode_revenue(self) -> float:
return sum(self._revenue_history)
@property
def episode_mean_price(self) -> float:
if not self._price_history:
return 0.0
return float(np.mean([np.mean(p) for p in self._price_history]))