mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
adding naive jax and libraries and make adjustments
This commit is contained in:
119
engine/lib/callbacks.py
Normal file
119
engine/lib/callbacks.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""Training callbacks for W&B/TensorBoard logging - reads from info dict."""
|
||||
|
||||
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
HAS_WANDB = True
|
||||
except ImportError:
|
||||
HAS_WANDB = False
|
||||
|
||||
|
||||
class MetricsCallback(BaseCallback):
|
||||
"""Training metrics logger - reads info['economics'], logs to W&B."""
|
||||
|
||||
def __init__(
|
||||
self, log_histograms: bool = True, log_freq: int = 100, verbose: int = 0
|
||||
):
|
||||
super().__init__(verbose)
|
||||
self.log_histograms = log_histograms
|
||||
self.log_freq = log_freq
|
||||
self._episode_revenues: list[float] = []
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
if not HAS_WANDB or wandb.run is None:
|
||||
return True
|
||||
|
||||
for info in self.locals.get("infos", []):
|
||||
if "economics" not in info:
|
||||
continue
|
||||
|
||||
econ = info["economics"]
|
||||
t = self.num_timesteps
|
||||
|
||||
payload = {
|
||||
"economics/revenue": econ["revenue"],
|
||||
"economics/margin": econ["margin"],
|
||||
"coi/level": econ["coi_level"],
|
||||
"economics/regret": econ["regret"],
|
||||
}
|
||||
if "coi_mix" in econ:
|
||||
payload["coi/mix"] = econ["coi_mix"]
|
||||
if "coi_base" in econ:
|
||||
payload["coi/base"] = econ["coi_base"]
|
||||
if "coi_leakage" in econ:
|
||||
payload["coi/leakage"] = econ["coi_leakage"]
|
||||
if "coi_penalty" in econ:
|
||||
payload["coi/penalty"] = econ["coi_penalty"]
|
||||
wandb.log(payload, step=t)
|
||||
|
||||
self._episode_revenues.append(econ["revenue"])
|
||||
|
||||
# histograms at log_freq intervals
|
||||
if self.log_histograms and self.num_timesteps % self.log_freq == 0:
|
||||
for info in self.locals.get("infos", []):
|
||||
if "prices" in info:
|
||||
wandb.log(
|
||||
{"distributions/prices": wandb.Histogram(info["prices"])},
|
||||
step=self.num_timesteps,
|
||||
)
|
||||
if "demand" in info:
|
||||
wandb.log(
|
||||
{"distributions/demand": wandb.Histogram(info["demand"])},
|
||||
step=self.num_timesteps,
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def _on_rollout_end(self) -> None:
|
||||
if not HAS_WANDB or wandb.run is None or not self._episode_revenues:
|
||||
return
|
||||
wandb.log(
|
||||
{
|
||||
"episode/mean_revenue": np.mean(self._episode_revenues),
|
||||
"episode/total_revenue": np.sum(self._episode_revenues),
|
||||
},
|
||||
step=self.num_timesteps,
|
||||
)
|
||||
self._episode_revenues = []
|
||||
|
||||
|
||||
class EvalMetricsCallback(EvalCallback):
|
||||
"""Deterministic evaluation - true performance without exploration noise."""
|
||||
|
||||
def __init__(
|
||||
self, eval_env, eval_freq: int = 1000, n_eval_episodes: int = 5, **kwargs
|
||||
):
|
||||
super().__init__(
|
||||
eval_env, eval_freq=eval_freq, n_eval_episodes=n_eval_episodes, **kwargs
|
||||
)
|
||||
self._eval_revenues: list[float] = []
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
result = super()._on_step()
|
||||
|
||||
if not HAS_WANDB or wandb.run is None:
|
||||
return result
|
||||
|
||||
# log eval metrics after evaluation runs
|
||||
if self.n_calls % self.eval_freq == 0 and hasattr(self, "last_mean_reward"):
|
||||
wandb.log(
|
||||
{
|
||||
"eval/mean_reward": self.last_mean_reward,
|
||||
"eval/mean_revenue": np.mean(self._eval_revenues)
|
||||
if self._eval_revenues
|
||||
else 0,
|
||||
},
|
||||
step=self.num_timesteps,
|
||||
)
|
||||
self._eval_revenues = []
|
||||
|
||||
return result
|
||||
|
||||
def _log_success_callback(self, locals_: dict, globals_: dict) -> None:
|
||||
# called after each eval episode
|
||||
info = locals_.get("info", {})
|
||||
if "economics" in info:
|
||||
self._eval_revenues.append(info["economics"]["revenue"])
|
||||
76
engine/lib/coi.py
Normal file
76
engine/lib/coi.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def compute_agent_probability(
|
||||
trajectory: list, human_transitions: Dict, agent_transitions: Dict
|
||||
) -> float:
|
||||
"""estimate agent probability via KL divergence between trajectory transitions and reference models
|
||||
|
||||
compares empirical trajectory transition distribution to human/agent prototypes
|
||||
|
||||
args:
|
||||
trajectory: list of state/event strings from session
|
||||
human_transitions: reference transition dict from human MDP (event->event->prob)
|
||||
agent_transitions: reference transition dict from agent MDP (event->event->prob)
|
||||
|
||||
returns:
|
||||
agent probability in [0, 1] via softmax over KL divergences
|
||||
"""
|
||||
if len(trajectory) < 2:
|
||||
return 0.0 # insufficient data, assume human
|
||||
|
||||
# build empirical transition distribution from trajectory
|
||||
trans_counts = {}
|
||||
for s, s_next in zip(trajectory[:-1], trajectory[1:]):
|
||||
if s not in trans_counts:
|
||||
trans_counts[s] = {}
|
||||
trans_counts[s][s_next] = trans_counts[s].get(s_next, 0) + 1
|
||||
|
||||
# normalize to probabilities
|
||||
empirical = {}
|
||||
for s, nxt in trans_counts.items():
|
||||
total = sum(nxt.values())
|
||||
empirical[s] = {s_n: cnt / total for s_n, cnt in nxt.items()}
|
||||
|
||||
# compute KL divergence to each prototype
|
||||
def kl_div(p_dist: Dict, q_dist: Dict) -> float:
|
||||
eps = 1e-10
|
||||
# aggregate over all source states in empirical dist
|
||||
kl = 0.0
|
||||
for s in p_dist:
|
||||
if s not in q_dist:
|
||||
continue # skip states not in reference
|
||||
p_trans, q_trans = p_dist[s], q_dist[s]
|
||||
for k in p_trans:
|
||||
p_val = p_trans[k] + eps
|
||||
q_val = q_trans.get(k, 0.0) + eps
|
||||
kl += p_val * np.log(p_val / q_val)
|
||||
return kl
|
||||
|
||||
kl_human = kl_div(empirical, human_transitions)
|
||||
kl_agent = kl_div(empirical, agent_transitions)
|
||||
|
||||
# convert to probability via softmax (lower KL = higher prob)
|
||||
# agent_prob = exp(-kl_agent) / (exp(-kl_human) + exp(-kl_agent))
|
||||
exp_h = np.exp(-kl_human)
|
||||
exp_a = np.exp(-kl_agent)
|
||||
return float(exp_a / (exp_h + exp_a + 1e-10))
|
||||
|
||||
|
||||
def extract_purchases(trajectories: list) -> Dict[int, int]:
|
||||
purchases: Dict[int, int] = {}
|
||||
for traj in trajectories:
|
||||
if traj and "checkout" in traj[-1] and "_product" in traj[-1]:
|
||||
prod_id = int(traj[-1].rsplit("_product", 1)[1])
|
||||
purchases[prod_id] = purchases.get(prod_id, 0) + 1
|
||||
return purchases
|
||||
|
||||
|
||||
def compute_uplift_coi(
|
||||
prices: np.ndarray, purchases: Dict[int, int], baseline_prices: np.ndarray
|
||||
) -> float:
|
||||
# TODO: consider view-weighted fractional purchase for denser signal
|
||||
return float(
|
||||
sum(max(0.0, prices[k] - baseline_prices[k]) * n for k, n in purchases.items())
|
||||
)
|
||||
70
engine/lib/discrete.py
Normal file
70
engine/lib/discrete.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from collections import defaultdict
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DiscretePriceActionWrapper(gym.ActionWrapper):
|
||||
def __init__(
|
||||
self,
|
||||
env: gym.Env,
|
||||
n_levels: int = 9,
|
||||
min_scale: float = 0.8,
|
||||
max_scale: float = 1.2,
|
||||
):
|
||||
super().__init__(env)
|
||||
self.scales = np.linspace(min_scale, max_scale, n_levels, dtype=np.float32)
|
||||
self.action_space = spaces.Discrete(n_levels)
|
||||
|
||||
def action(self, action: int):
|
||||
scale = float(self.scales[int(action)])
|
||||
cur = np.asarray(self.env.unwrapped._prices, dtype=np.float32)
|
||||
lo, hi = self.env.unwrapped.price_bounds
|
||||
return np.clip(cur * scale, lo, hi).astype(np.float32)
|
||||
|
||||
|
||||
class EventQTable:
|
||||
def __init__(
|
||||
self,
|
||||
n_actions: int,
|
||||
n_products: int,
|
||||
price_bounds: tuple,
|
||||
lr: float = 0.1,
|
||||
gamma: float = 0.99,
|
||||
n_bins: int = 6,
|
||||
):
|
||||
self.n_actions = int(n_actions)
|
||||
self.n_products = int(n_products)
|
||||
self.lr = float(lr)
|
||||
self.gamma = float(gamma)
|
||||
self.q = defaultdict(lambda: np.zeros(self.n_actions, dtype=np.float32))
|
||||
lo, hi = price_bounds
|
||||
self.demand_bins = np.linspace(0.0, 100.0, n_bins + 1)[1:-1]
|
||||
self.price_bins = np.linspace(lo, hi, n_bins + 1)[1:-1]
|
||||
|
||||
def encode(self, obs: np.ndarray) -> tuple:
|
||||
obs = np.asarray(obs, dtype=np.float32)
|
||||
d = obs[: self.n_products]
|
||||
p = obs[self.n_products : 2 * self.n_products]
|
||||
d_mean = float(np.mean(d)) if d.size else 0.0
|
||||
d_std = float(np.std(d)) if d.size else 0.0
|
||||
p_mean = float(np.mean(p)) if p.size else 0.0
|
||||
return (
|
||||
int(np.digitize(d_mean, self.demand_bins)),
|
||||
int(np.digitize(d_std, self.demand_bins)),
|
||||
int(np.digitize(p_mean, self.price_bins)),
|
||||
)
|
||||
|
||||
def act(self, obs: np.ndarray, eps: float = 0.0) -> tuple[int, tuple]:
|
||||
s = self.encode(obs)
|
||||
if np.random.random() < eps:
|
||||
return int(np.random.randint(self.n_actions)), s
|
||||
return int(np.argmax(self.q[s])), s
|
||||
|
||||
def update(self, s: tuple, a: int, r: float, s2: tuple, done: bool):
|
||||
target = r + (0.0 if done else self.gamma * float(np.max(self.q[s2])))
|
||||
self.q[s][a] += self.lr * (target - self.q[s][a])
|
||||
|
||||
def predict(self, obs: np.ndarray, deterministic: bool = True):
|
||||
a, _ = self.act(obs, 0.0 if deterministic else 0.05)
|
||||
return a, None
|
||||
182
engine/lib/providers.py
Normal file
182
engine/lib/providers.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""Provider benchmarking - compare pricing strategies across contamination levels."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable, Any
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
HAS_WANDB = True
|
||||
except ImportError:
|
||||
HAS_WANDB = False
|
||||
|
||||
|
||||
class RandomBaseline:
|
||||
"""uniform random action selection as a lower-bound baseline"""
|
||||
|
||||
def __init__(self, n_actions: int):
|
||||
self.n = n_actions
|
||||
|
||||
def __call__(self, obs):
|
||||
return int(np.random.randint(self.n))
|
||||
|
||||
def predict(self, obs, **kw):
|
||||
return self(obs), None
|
||||
|
||||
|
||||
class SurgeBaseline:
|
||||
"""heuristic surge pricing: boost price when demand is above threshold, discount when below.
|
||||
matches the naive pricing rule from thesis Section 3.3.2"""
|
||||
|
||||
def __init__(
|
||||
self, n_actions: int, high_threshold: float = 60.0, low_threshold: float = 30.0
|
||||
):
|
||||
self.n = n_actions
|
||||
self.mid = n_actions // 2 # identity action (scale ~1.0)
|
||||
self.high_t = high_threshold
|
||||
self.low_t = low_threshold
|
||||
|
||||
def __call__(self, obs):
|
||||
obs = np.asarray(obs, dtype=np.float32)
|
||||
n_prod = len(obs) // 2
|
||||
demand_mean = float(np.mean(obs[:n_prod])) if n_prod > 0 else 0.0
|
||||
if demand_mean >= self.high_t:
|
||||
return min(self.mid + 2, self.n - 1) # surge: two levels above identity
|
||||
if demand_mean <= self.low_t:
|
||||
return max(self.mid - 2, 0) # discount: two levels below identity
|
||||
return self.mid # hold
|
||||
|
||||
def predict(self, obs, **kw):
|
||||
return self(obs), None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProviderResult:
|
||||
"""Single benchmark result for one provider at one alpha level."""
|
||||
|
||||
name: str
|
||||
alpha: float
|
||||
total_revenue: float
|
||||
mean_revenue: float
|
||||
coi_level: float
|
||||
coi_preserved_pct: float # vs alpha=0 baseline
|
||||
margin_integrity: float
|
||||
regret: float
|
||||
episodes: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkConfig:
|
||||
"""Configuration for provider benchmark runs."""
|
||||
|
||||
n_episodes: int = 100
|
||||
alpha_range: list[float] = field(default_factory=lambda: [0.0, 0.1, 0.3, 0.5])
|
||||
baseline_name: str = "fixed"
|
||||
|
||||
|
||||
class ProviderBenchmark:
|
||||
"""Compare pricing providers to prove margin preservation across contamination levels.
|
||||
|
||||
Usage:
|
||||
def env_factory(alpha):
|
||||
return EconomicMetricsWrapper(PHANTOM(alpha=alpha))
|
||||
|
||||
providers = {
|
||||
"fixed": lambda obs: np.ones(10) * 50,
|
||||
"learned": model.predict,
|
||||
}
|
||||
|
||||
benchmark = ProviderBenchmark(env_factory, providers)
|
||||
results = benchmark.run()
|
||||
print(benchmark.summary_table())
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
env_factory: Callable[[float], Any],
|
||||
providers: dict[str, Callable],
|
||||
config: BenchmarkConfig | None = None,
|
||||
):
|
||||
self.env_factory = env_factory # fn(alpha) -> wrapped env
|
||||
self.providers = providers # {name: fn(obs) -> action}
|
||||
self.config = config or BenchmarkConfig()
|
||||
self.results: list[ProviderResult] = []
|
||||
|
||||
def run(self) -> list[ProviderResult]:
|
||||
"""Run benchmark across all providers and alpha levels."""
|
||||
baseline_coi: dict[str, float] = {} # {provider: coi at alpha=0}
|
||||
|
||||
for alpha in self.config.alpha_range:
|
||||
env = self.env_factory(alpha)
|
||||
|
||||
for name, policy_fn in self.providers.items():
|
||||
revenues, coi_levels, margins = [], [], []
|
||||
|
||||
for _ in range(self.config.n_episodes):
|
||||
obs, _ = env.reset()
|
||||
episode_revenue = 0.0
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
action = policy_fn(obs)
|
||||
# handle sb3 model.predict returning tuple
|
||||
if isinstance(action, tuple):
|
||||
action = action[0]
|
||||
obs, reward, term, trunc, info = env.step(action)
|
||||
done = term or trunc
|
||||
|
||||
econ = info.get("economics", {})
|
||||
episode_revenue += econ.get("revenue", 0)
|
||||
coi_levels.append(econ.get("coi_level", 0))
|
||||
margins.append(econ.get("margin", 0))
|
||||
|
||||
revenues.append(episode_revenue)
|
||||
|
||||
mean_coi = np.mean(coi_levels) if coi_levels else 0.0
|
||||
if alpha == 0.0:
|
||||
baseline_coi[name] = mean_coi
|
||||
|
||||
base = baseline_coi.get(name, mean_coi)
|
||||
coi_preserved = mean_coi / base if base > 0 else 1.0
|
||||
|
||||
result = ProviderResult(
|
||||
name=name,
|
||||
alpha=alpha,
|
||||
total_revenue=float(np.sum(revenues)),
|
||||
mean_revenue=float(np.mean(revenues)),
|
||||
coi_level=mean_coi,
|
||||
coi_preserved_pct=coi_preserved * 100,
|
||||
margin_integrity=float(np.mean(margins)) if margins else 0.0,
|
||||
regret=0.0, # compute vs optimal if known
|
||||
episodes=self.config.n_episodes,
|
||||
)
|
||||
self.results.append(result)
|
||||
|
||||
# log to wandb if available
|
||||
if HAS_WANDB and wandb.run is not None:
|
||||
wandb.log(
|
||||
{
|
||||
f"benchmark/{name}/revenue": result.mean_revenue,
|
||||
f"benchmark/{name}/coi_preserved": result.coi_preserved_pct,
|
||||
f"benchmark/{name}/margin": result.margin_integrity,
|
||||
"benchmark/alpha": alpha,
|
||||
}
|
||||
)
|
||||
|
||||
return self.results
|
||||
|
||||
def to_dataframe(self) -> pd.DataFrame:
|
||||
"""Convert results to pandas DataFrame."""
|
||||
return pd.DataFrame([r.__dict__ for r in self.results])
|
||||
|
||||
def summary_table(self) -> pd.DataFrame:
|
||||
"""Pivot table: providers x alpha with revenue/COI metrics."""
|
||||
df = self.to_dataframe()
|
||||
return df.pivot_table(
|
||||
index="name",
|
||||
columns="alpha",
|
||||
values=["mean_revenue", "coi_preserved_pct", "margin_integrity"],
|
||||
aggfunc="mean",
|
||||
)
|
||||
77
engine/lib/wrappers.py
Normal file
77
engine/lib/wrappers.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Economic metrics wrapper - calculates thesis-aligned KPIs and injects into info dict."""
|
||||
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
|
||||
|
||||
class EconomicMetricsWrapper(gym.Wrapper):
|
||||
"""Calculates thesis-aligned economic metrics per step, injects into info.
|
||||
|
||||
Metrics follow thesis definitions:
|
||||
- COI level: E[P] - p_min (Definition 1)
|
||||
- Margin: (avg_price - p_min) / avg_price
|
||||
- Regret: 1 - (revenue / baseline_revenue)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, env: gym.Env, p_min: float = 10.0, baseline_revenue: float | None = None
|
||||
):
|
||||
super().__init__(env)
|
||||
self.p_min = p_min
|
||||
self.baseline_revenue = baseline_revenue
|
||||
self._price_history: list[np.ndarray] = []
|
||||
self._revenue_history: list[float] = []
|
||||
|
||||
def reset(self, **kwargs):
|
||||
obs, info = self.env.reset(**kwargs)
|
||||
self._price_history = []
|
||||
self._revenue_history = []
|
||||
return obs, info
|
||||
|
||||
def step(self, action):
|
||||
obs, reward, terminated, truncated, info = self.env.step(action)
|
||||
|
||||
# extract from unwrapped env
|
||||
prices = self.env.unwrapped._prices
|
||||
demand_dict = self.env.unwrapped._demand
|
||||
demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))])
|
||||
alpha = self.env.unwrapped.alpha
|
||||
|
||||
# core calculations
|
||||
revenue = float(np.sum(prices * demand))
|
||||
avg_price = float(np.mean(prices))
|
||||
margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
|
||||
coi_level = avg_price - self.p_min # E[P] - p_min per thesis Def 1
|
||||
|
||||
self._price_history.append(prices.copy())
|
||||
self._revenue_history.append(revenue)
|
||||
|
||||
# regret vs baseline (golden path)
|
||||
regret = 0.0
|
||||
if self.baseline_revenue and self.baseline_revenue > 0:
|
||||
regret = 1.0 - (revenue / self.baseline_revenue)
|
||||
|
||||
# inject structured metrics into info
|
||||
info["economics"] = {
|
||||
"revenue": revenue,
|
||||
"margin": margin,
|
||||
"coi_level": coi_level,
|
||||
"regret": regret,
|
||||
}
|
||||
for key in ("coi_mix", "coi_base", "coi_leakage", "coi_penalty"):
|
||||
if key in info:
|
||||
info["economics"][key] = info[key]
|
||||
info["prices"] = prices.copy()
|
||||
info["demand"] = demand.copy()
|
||||
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
@property
|
||||
def episode_revenue(self) -> float:
|
||||
return sum(self._revenue_history)
|
||||
|
||||
@property
|
||||
def episode_mean_price(self) -> float:
|
||||
if not self._price_history:
|
||||
return 0.0
|
||||
return float(np.mean([np.mean(p) for p in self._price_history]))
|
||||
Reference in New Issue
Block a user