responsive and representative demand for COI erosion

2026-07-15 17:43:36 +00:00 · 2026-03-11 12:46:22 +01:00
parent 0f708aab15
commit fa2dde8307
7 changed files with 66 additions and 145 deletions
--- a/engine/backends/common.py
+++ b/engine/backends/common.py
@@ -15,6 +15,10 @@ def make_env(cfg: Mapping[str, Any]):
        n_products=int(cfg["n_products"]),
        alpha=float(cfg["alpha"]),
        N=int(cfg["N"]),
+        agent_params=(
+            float(cfg.get("agent_mu", 45.0)),
+            float(cfg.get("agent_std", 15.0)),
+        ),
        price_bounds=(float(cfg["price_low"]), float(cfg["price_high"])),
        lambda_coi=float(cfg["lambda_coi"]),
        robust_radius=float(cfg["robust_radius"]),
--- a/engine/lib/demand.py
+++ b/engine/lib/demand.py
@@ -17,18 +17,32 @@ def generate_demand_for_actor(
    params: tuple,
    noise_std: float = 1.0,
    distribution_method=np.random.normal,
+    normalize: bool = False,
 ) -> np.ndarray:
    """d(p;0) = max(0, valuation - price) + epsi for single actor type
    params: (mean, std) for valuation distribution D_H or D_A"""
    val = distribution_method(*params, size=len(prices))
    noise = distribution_method(0, noise_std, len(prices))
    demand = np.maximum(0, val - prices + noise)
+    if not normalize:
+        return demand
    total = np.sum(demand)
    return demand / total * 100 if total > 0 else demand


-def estimate_demand(trajectories, action_weights=None):
-    return estimate_weighted_demand(trajectories, action_weights)
+def estimate_demand(
+    trajectories,
+    action_weights=None,
+    *,
+    normalize: bool = False,
+    per_session: bool = True,
+):
+    return estimate_weighted_demand(
+        trajectories,
+        action_weights,
+        normalize=normalize,
+        per_session=per_session,
+    )


 def _parse_event_state(state: str):
@@ -50,7 +64,13 @@ def _weight_for_action(action: str, action_weights: dict) -> float:
    return CATEGORY_WEIGHTS["nav"]


-def estimate_weighted_demand(trajectories, action_weights=None):
+def estimate_weighted_demand(
+    trajectories,
+    action_weights=None,
+    *,
+    normalize: bool = False,
+    per_session: bool = True,
+):
    action_weights = (
        DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights
    )
@@ -64,12 +84,20 @@ def estimate_weighted_demand(trajectories, action_weights=None):
            if w <= 0:
                continue
            scores[product_id] = scores.get(product_id, 0.0) + w
-    total = sum(scores.values())
-    return (
-        {pid: (score / total) * 100 for pid, score in scores.items()}
-        if total > 0
-        else {}
-    )
+    if not scores:
+        return {}
+
+    if per_session and len(trajectories) > 0:
+        inv_n = 1.0 / float(len(trajectories))
+        scores = {pid: score * inv_n for pid, score in scores.items()}
+
+    if not normalize:
+        return scores
+
+    total = float(sum(scores.values()))
+    if total <= 0:
+        return {}
+    return {pid: (score / total) * 100.0 for pid, score in scores.items()}


 # Example usage
--- a/engine/lib/wrappers.py
+++ b/engine/lib/wrappers.py
@@ -32,17 +32,23 @@ class EconomicMetricsWrapper(gym.Wrapper):
        obs, reward, terminated, truncated, info = self.env.step(action)

        # extract from unwrapped env
-        prices = self.env.unwrapped._prices
+        quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float)
+        effective_prices = np.asarray(
+            info.get("effective_prices", quoted_prices), dtype=float
+        )
+        if effective_prices.shape != quoted_prices.shape:
+            effective_prices = quoted_prices
        demand_dict = self.env.unwrapped._demand
-        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))])
+        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))])

        # core calculations
-        revenue = float(np.sum(prices * demand))
-        avg_price = float(np.mean(prices))
+        revenue = float(info.get("revenue", np.sum(effective_prices * demand)))
+        quoted_revenue = float(np.sum(quoted_prices * demand))
+        avg_price = float(np.mean(effective_prices))
        margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
        coi_level = avg_price - self.p_min  # E[P] - p_min per thesis Def 1

-        self._price_history.append(prices.copy())
+        self._price_history.append(effective_prices.copy())
        self._revenue_history.append(revenue)

        # regret vs baseline (golden path)
@@ -53,6 +59,7 @@ class EconomicMetricsWrapper(gym.Wrapper):
        # inject structured metrics into info
        info["economics"] = {
            "revenue": revenue,
+            "quoted_revenue": quoted_revenue,
            "margin": margin,
            "coi_level": coi_level,
            "regret": regret,
@@ -71,10 +78,13 @@ class EconomicMetricsWrapper(gym.Wrapper):
            "agent_prob",
            "alpha_adv",
            "alpha_nominal",
+            "erosion_share",
+            "effective_price_mean",
        ):
            if key in info:
                info["economics"][key] = info[key]
-        info["prices"] = prices.copy()
+        info["prices"] = quoted_prices.copy()
+        info["effective_prices"] = effective_prices.copy()
        info["demand"] = demand.copy()

        return obs, reward, terminated, truncated, info
--- a/engine/spec.py
+++ b/engine/spec.py
@@ -72,6 +72,8 @@ class EnvSpec:
    max_steps: int = 100
    margin_floor: float = 0.05
    margin_floor_patience: int = 5
+    agent_mu: float = 45.0
+    agent_std: float = 15.0


@dataclass(frozen=True)
@@ -167,6 +169,8 @@ class TrainSpec:
            "max_steps": self.env.max_steps,
            "margin_floor": self.env.margin_floor,
            "margin_floor_patience": self.env.margin_floor_patience,
+            "agent_mu": self.env.agent_mu,
+            "agent_std": self.env.agent_std,
            "alpha": self.study.alpha,
            "lambda_coi": self.study.lambda_coi,
            "robust_radius": self.study.robust_radius,
@@ -246,6 +250,8 @@ class TrainSpec:
                max_steps=int(base["max_steps"]),
                margin_floor=float(base["margin_floor"]),
                margin_floor_patience=int(base["margin_floor_patience"]),
+                agent_mu=float(base.get("agent_mu", 45.0)),
+                agent_std=float(base.get("agent_std", 15.0)),
            ),
            study=StudySpec(
                alpha=float(base["alpha"]),
--- a/engine/studies/margin_erosion_alpha.py
+++ b/engine/studies/margin_erosion_alpha.py
@@ -31,6 +31,9 @@ def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict:
            "eval_freq": 5000,
            "eval_episodes": 10,
            "log_freq": 500,
+            "robust_eval_enabled": False,
+            "agent_mu": 12.0,
+            "agent_std": 2.0,
        }
    )
    result = run_train_once(
--- a/engine/studies/plot_margin_erosion.py
+++ b/engine/studies/plot_margin_erosion.py
@@ -1,126 +0,0 @@
-"""plot margin erosion: margin/COI/revenue vs α with thesis-quality formatting"""
-
-import json, sys
-from pathlib import Path
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib as mpl
-
-mpl.rcParams.update(
-    {
-        "font.size": 10,
-        "axes.labelsize": 11,
-        "axes.titlesize": 12,
-        "xtick.labelsize": 9,
-        "ytick.labelsize": 9,
-        "legend.fontsize": 9,
-        "figure.figsize": (7, 4),
-        "figure.dpi": 150,
-        "lines.linewidth": 1.5,
-        "lines.markersize": 6,
-        "errorbar.capsize": 3,
-        "grid.alpha": 0.3,
-    }
-)
-
-
-def plot_margin_erosion(data: dict, out: Path):
-    s = data["summary"]
-    αs = sorted([float(k.split("_")[1]) for k in s.keys()])
-
-    def get(metric):
-        return (
-            [s[f"alpha_{α:.1f}"][f"{metric}_mean"] for α in αs],
-            [s[f"alpha_{α:.1f}"][f"{metric}_std"] for α in αs],
-        )
-
-    margins, margin_e = get("margin")
-    cois, coi_e = get("coi_level")
-    revs, rev_e = get("revenue")
-
-    fig, axes = plt.subplots(1, 3, figsize=(12, 3.5))
-
-    axes[0].errorbar(
-        αs,
-        margins,
-        yerr=margin_e,
-        marker="o",
-        capsize=4,
-        label="Standard RL",
-        color="#d62728",
-    )
-    axes[0].axhline(0.05, color="gray", linestyle="--", linewidth=1, label="Floor")
-    axes[0].set(
-        xlabel="Agent proportion (α)",
-        ylabel="Margin",
-        title="Margin erosion",
-        ylim=(0, max(margins) * 1.2),
-    )
-    axes[0].grid(alpha=0.3)
-    axes[0].legend(loc="upper right")
-
-    axes[1].errorbar(αs, cois, yerr=coi_e, marker="s", capsize=4, color="#ff7f0e")
-    axes[1].set(
-        xlabel="Agent proportion (α)",
-        ylabel="COI",
-        title="COI collapse (E[P] - p_min)",
-        ylim=(0, None),
-    )
-    axes[1].grid(alpha=0.3)
-
-    axes[2].errorbar(αs, revs, yerr=rev_e, marker="^", capsize=4, color="#2ca02c")
-    axes[2].set(
-        xlabel="Agent proportion (α)",
-        ylabel="Revenue",
-        title="Revenue degradation",
-        ylim=(0, None),
-    )
-    axes[2].grid(alpha=0.3)
-
-    plt.tight_layout()
-    pdf = out / "margin_erosion_alpha.pdf"
-    png = out / "margin_erosion_alpha.png"
-    plt.savefig(pdf, bbox_inches="tight", dpi=300)
-    plt.savefig(png, bbox_inches="tight", dpi=150)
-    print(f"→ {pdf}\n→ {png}")
-
-
-def print_latex(data: dict):
-    s = data["summary"]
-    αs = sorted([float(k.split("_")[1]) for k in s.keys()])
-
-    print("\n% LaTeX table for appendix")
-    print("\\begin{table}[h]\n\\centering")
-    print("\\caption{Margin erosion: standard RL under agent contamination}")
-    print("\\label{tab:margin_erosion}")
-    print("\\begin{tabular}{cccc}\n\\toprule")
-    print("α & Margin & COI & Revenue \\\\\n\\midrule")
-
-    for α in αs:
-        d = s[f"alpha_{α:.1f}"]
-        print(
-            f"{α:.1f} & ${d['margin_mean']:.3f} \\pm {d['margin_std']:.3f}$ & "
-            f"${d['coi_level_mean']:.1f} \\pm {d['coi_level_std']:.1f}$ & "
-            f"${d['revenue_mean']:.0f} \\pm {d['revenue_std']:.0f}$ \\\\"
-        )
-
-    print("\\bottomrule\n\\end{tabular}\n\\end{table}")
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        sys.exit("usage: python -m engine.studies.plot_margin_erosion <results.json>")
-
-    path = Path(sys.argv[1])
-    if not path.exists():
-        sys.exit(f"error: {path} not found")
-
-    with open(path) as f:
-        data = json.load(f)
-
-    plot_margin_erosion(data, path.parent)
-    print_latex(data)
-    print(
-        f"\n{len(data['results'])} runs, {len(data['summary'])} α levels, "
-        f"algos={data['config']['algos']}, seeds={data['config']['seeds']}"
-    )