diff --git a/Makefile b/Makefile index 8a7d203..f0072a7 100644 --- a/Makefile +++ b/Makefile @@ -146,10 +146,6 @@ study.margin-erosion: study.margin-erosion.quick: python -m engine.studies.margin_erosion_alpha --quick -.PHONY: study.margin-erosion.plot -study.margin-erosion.plot: - python -m engine.studies.plot_margin_erosion engine/studies/results/margin_erosion_alpha_*.json - .PHONY: wordcount wordcount: @$(NX) run paper:wordcount diff --git a/engine/backends/common.py b/engine/backends/common.py index 9e50d48..ca508f7 100644 --- a/engine/backends/common.py +++ b/engine/backends/common.py @@ -15,6 +15,10 @@ def make_env(cfg: Mapping[str, Any]): n_products=int(cfg["n_products"]), alpha=float(cfg["alpha"]), N=int(cfg["N"]), + agent_params=( + float(cfg.get("agent_mu", 45.0)), + float(cfg.get("agent_std", 15.0)), + ), price_bounds=(float(cfg["price_low"]), float(cfg["price_high"])), lambda_coi=float(cfg["lambda_coi"]), robust_radius=float(cfg["robust_radius"]), diff --git a/engine/lib/demand.py b/engine/lib/demand.py index cb37c3d..ba3ddfd 100644 --- a/engine/lib/demand.py +++ b/engine/lib/demand.py @@ -17,18 +17,32 @@ def generate_demand_for_actor( params: tuple, noise_std: float = 1.0, distribution_method=np.random.normal, + normalize: bool = False, ) -> np.ndarray: """d(p;0) = max(0, valuation - price) + epsi for single actor type params: (mean, std) for valuation distribution D_H or D_A""" val = distribution_method(*params, size=len(prices)) noise = distribution_method(0, noise_std, len(prices)) demand = np.maximum(0, val - prices + noise) + if not normalize: + return demand total = np.sum(demand) return demand / total * 100 if total > 0 else demand -def estimate_demand(trajectories, action_weights=None): - return estimate_weighted_demand(trajectories, action_weights) +def estimate_demand( + trajectories, + action_weights=None, + *, + normalize: bool = False, + per_session: bool = True, +): + return estimate_weighted_demand( + trajectories, + action_weights, + normalize=normalize, + per_session=per_session, + ) def _parse_event_state(state: str): @@ -50,7 +64,13 @@ def _weight_for_action(action: str, action_weights: dict) -> float: return CATEGORY_WEIGHTS["nav"] -def estimate_weighted_demand(trajectories, action_weights=None): +def estimate_weighted_demand( + trajectories, + action_weights=None, + *, + normalize: bool = False, + per_session: bool = True, +): action_weights = ( DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights ) @@ -64,12 +84,20 @@ def estimate_weighted_demand(trajectories, action_weights=None): if w <= 0: continue scores[product_id] = scores.get(product_id, 0.0) + w - total = sum(scores.values()) - return ( - {pid: (score / total) * 100 for pid, score in scores.items()} - if total > 0 - else {} - ) + if not scores: + return {} + + if per_session and len(trajectories) > 0: + inv_n = 1.0 / float(len(trajectories)) + scores = {pid: score * inv_n for pid, score in scores.items()} + + if not normalize: + return scores + + total = float(sum(scores.values())) + if total <= 0: + return {} + return {pid: (score / total) * 100.0 for pid, score in scores.items()} # Example usage diff --git a/engine/lib/wrappers.py b/engine/lib/wrappers.py index f68a27c..4cfd706 100644 --- a/engine/lib/wrappers.py +++ b/engine/lib/wrappers.py @@ -32,17 +32,23 @@ class EconomicMetricsWrapper(gym.Wrapper): obs, reward, terminated, truncated, info = self.env.step(action) # extract from unwrapped env - prices = self.env.unwrapped._prices + quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float) + effective_prices = np.asarray( + info.get("effective_prices", quoted_prices), dtype=float + ) + if effective_prices.shape != quoted_prices.shape: + effective_prices = quoted_prices demand_dict = self.env.unwrapped._demand - demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))]) + demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))]) # core calculations - revenue = float(np.sum(prices * demand)) - avg_price = float(np.mean(prices)) + revenue = float(info.get("revenue", np.sum(effective_prices * demand))) + quoted_revenue = float(np.sum(quoted_prices * demand)) + avg_price = float(np.mean(effective_prices)) margin = (avg_price - self.p_min) / max(avg_price, 1e-6) coi_level = avg_price - self.p_min # E[P] - p_min per thesis Def 1 - self._price_history.append(prices.copy()) + self._price_history.append(effective_prices.copy()) self._revenue_history.append(revenue) # regret vs baseline (golden path) @@ -53,6 +59,7 @@ class EconomicMetricsWrapper(gym.Wrapper): # inject structured metrics into info info["economics"] = { "revenue": revenue, + "quoted_revenue": quoted_revenue, "margin": margin, "coi_level": coi_level, "regret": regret, @@ -71,10 +78,13 @@ class EconomicMetricsWrapper(gym.Wrapper): "agent_prob", "alpha_adv", "alpha_nominal", + "erosion_share", + "effective_price_mean", ): if key in info: info["economics"][key] = info[key] - info["prices"] = prices.copy() + info["prices"] = quoted_prices.copy() + info["effective_prices"] = effective_prices.copy() info["demand"] = demand.copy() return obs, reward, terminated, truncated, info diff --git a/engine/spec.py b/engine/spec.py index 818d59f..5ddd0ce 100644 --- a/engine/spec.py +++ b/engine/spec.py @@ -72,6 +72,8 @@ class EnvSpec: max_steps: int = 100 margin_floor: float = 0.05 margin_floor_patience: int = 5 + agent_mu: float = 45.0 + agent_std: float = 15.0 @dataclass(frozen=True) @@ -167,6 +169,8 @@ class TrainSpec: "max_steps": self.env.max_steps, "margin_floor": self.env.margin_floor, "margin_floor_patience": self.env.margin_floor_patience, + "agent_mu": self.env.agent_mu, + "agent_std": self.env.agent_std, "alpha": self.study.alpha, "lambda_coi": self.study.lambda_coi, "robust_radius": self.study.robust_radius, @@ -246,6 +250,8 @@ class TrainSpec: max_steps=int(base["max_steps"]), margin_floor=float(base["margin_floor"]), margin_floor_patience=int(base["margin_floor_patience"]), + agent_mu=float(base.get("agent_mu", 45.0)), + agent_std=float(base.get("agent_std", 15.0)), ), study=StudySpec( alpha=float(base["alpha"]), diff --git a/engine/studies/margin_erosion_alpha.py b/engine/studies/margin_erosion_alpha.py index ef2dc79..3ff97a4 100644 --- a/engine/studies/margin_erosion_alpha.py +++ b/engine/studies/margin_erosion_alpha.py @@ -31,6 +31,9 @@ def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict: "eval_freq": 5000, "eval_episodes": 10, "log_freq": 500, + "robust_eval_enabled": False, + "agent_mu": 12.0, + "agent_std": 2.0, } ) result = run_train_once( diff --git a/engine/studies/plot_margin_erosion.py b/engine/studies/plot_margin_erosion.py deleted file mode 100644 index 021a8b5..0000000 --- a/engine/studies/plot_margin_erosion.py +++ /dev/null @@ -1,126 +0,0 @@ -"""plot margin erosion: margin/COI/revenue vs α with thesis-quality formatting""" - -import json, sys -from pathlib import Path -import numpy as np -import matplotlib.pyplot as plt -import matplotlib as mpl - -mpl.rcParams.update( - { - "font.size": 10, - "axes.labelsize": 11, - "axes.titlesize": 12, - "xtick.labelsize": 9, - "ytick.labelsize": 9, - "legend.fontsize": 9, - "figure.figsize": (7, 4), - "figure.dpi": 150, - "lines.linewidth": 1.5, - "lines.markersize": 6, - "errorbar.capsize": 3, - "grid.alpha": 0.3, - } -) - - -def plot_margin_erosion(data: dict, out: Path): - s = data["summary"] - αs = sorted([float(k.split("_")[1]) for k in s.keys()]) - - def get(metric): - return ( - [s[f"alpha_{α:.1f}"][f"{metric}_mean"] for α in αs], - [s[f"alpha_{α:.1f}"][f"{metric}_std"] for α in αs], - ) - - margins, margin_e = get("margin") - cois, coi_e = get("coi_level") - revs, rev_e = get("revenue") - - fig, axes = plt.subplots(1, 3, figsize=(12, 3.5)) - - axes[0].errorbar( - αs, - margins, - yerr=margin_e, - marker="o", - capsize=4, - label="Standard RL", - color="#d62728", - ) - axes[0].axhline(0.05, color="gray", linestyle="--", linewidth=1, label="Floor") - axes[0].set( - xlabel="Agent proportion (α)", - ylabel="Margin", - title="Margin erosion", - ylim=(0, max(margins) * 1.2), - ) - axes[0].grid(alpha=0.3) - axes[0].legend(loc="upper right") - - axes[1].errorbar(αs, cois, yerr=coi_e, marker="s", capsize=4, color="#ff7f0e") - axes[1].set( - xlabel="Agent proportion (α)", - ylabel="COI", - title="COI collapse (E[P] - p_min)", - ylim=(0, None), - ) - axes[1].grid(alpha=0.3) - - axes[2].errorbar(αs, revs, yerr=rev_e, marker="^", capsize=4, color="#2ca02c") - axes[2].set( - xlabel="Agent proportion (α)", - ylabel="Revenue", - title="Revenue degradation", - ylim=(0, None), - ) - axes[2].grid(alpha=0.3) - - plt.tight_layout() - pdf = out / "margin_erosion_alpha.pdf" - png = out / "margin_erosion_alpha.png" - plt.savefig(pdf, bbox_inches="tight", dpi=300) - plt.savefig(png, bbox_inches="tight", dpi=150) - print(f"→ {pdf}\n→ {png}") - - -def print_latex(data: dict): - s = data["summary"] - αs = sorted([float(k.split("_")[1]) for k in s.keys()]) - - print("\n% LaTeX table for appendix") - print("\\begin{table}[h]\n\\centering") - print("\\caption{Margin erosion: standard RL under agent contamination}") - print("\\label{tab:margin_erosion}") - print("\\begin{tabular}{cccc}\n\\toprule") - print("α & Margin & COI & Revenue \\\\\n\\midrule") - - for α in αs: - d = s[f"alpha_{α:.1f}"] - print( - f"{α:.1f} & ${d['margin_mean']:.3f} \\pm {d['margin_std']:.3f}$ & " - f"${d['coi_level_mean']:.1f} \\pm {d['coi_level_std']:.1f}$ & " - f"${d['revenue_mean']:.0f} \\pm {d['revenue_std']:.0f}$ \\\\" - ) - - print("\\bottomrule\n\\end{tabular}\n\\end{table}") - - -if __name__ == "__main__": - if len(sys.argv) < 2: - sys.exit("usage: python -m engine.studies.plot_margin_erosion ") - - path = Path(sys.argv[1]) - if not path.exists(): - sys.exit(f"error: {path} not found") - - with open(path) as f: - data = json.load(f) - - plot_margin_erosion(data, path.parent) - print_latex(data) - print( - f"\n{len(data['results'])} runs, {len(data['summary'])} α levels, " - f"algos={data['config']['algos']}, seeds={data['config']['seeds']}" - )