responsive and representative demand for COI erosion

2026-07-16 01:53:37 +00:00 · 2026-03-11 12:46:22 +01:00
parent 0f708aab15
commit fa2dde8307
7 changed files with 66 additions and 145 deletions
--- a/4
+++ b/4
@@ -146,10 +146,6 @@ study.margin-erosion:
 study.margin-erosion.quick:
 	python -m engine.studies.margin_erosion_alpha --quick
 .PHONY: study.margin-erosion.plot
 study.margin-erosion.plot:
 	python -m engine.studies.plot_margin_erosion engine/studies/results/margin_erosion_alpha_*.json
 .PHONY: wordcount
 wordcount:
 	@$(NX) run paper:wordcount
--- a/engine/backends/common.py
+++ b/engine/backends/common.py
@@ -15,6 +15,10 @@ def make_env(cfg: Mapping[str, Any]):
        n_products=int(cfg["n_products"]),
        alpha=float(cfg["alpha"]),
        N=int(cfg["N"]),
        agent_params=(
            float(cfg.get("agent_mu", 45.0)),
            float(cfg.get("agent_std", 15.0)),
        ),
        price_bounds=(float(cfg["price_low"]), float(cfg["price_high"])),
        lambda_coi=float(cfg["lambda_coi"]),
        robust_radius=float(cfg["robust_radius"]),
--- a/engine/lib/demand.py
+++ b/engine/lib/demand.py
@@ -17,18 +17,32 @@ def generate_demand_for_actor(
    params: tuple,
    noise_std: float = 1.0,
    distribution_method=np.random.normal,
    normalize: bool = False,
 ) -> np.ndarray:
    """d(p;0) = max(0, valuation - price) + epsi for single actor type
    params: (mean, std) for valuation distribution D_H or D_A"""
    val = distribution_method(*params, size=len(prices))
    noise = distribution_method(0, noise_std, len(prices))
    demand = np.maximum(0, val - prices + noise)
    if not normalize:
        return demand
    total = np.sum(demand)
    return demand / total * 100 if total > 0 else demand
-def estimate_demand(trajectories, action_weights=None):
+def estimate_demand(
-    return estimate_weighted_demand(trajectories, action_weights)
+    trajectories,
    action_weights=None,
    *,
    normalize: bool = False,
    per_session: bool = True,
 ):
    return estimate_weighted_demand(
        trajectories,
        action_weights,
        normalize=normalize,
        per_session=per_session,
    )
 def _parse_event_state(state: str):
@@ -50,7 +64,13 @@ def _weight_for_action(action: str, action_weights: dict) -> float:
    return CATEGORY_WEIGHTS["nav"]
-def estimate_weighted_demand(trajectories, action_weights=None):
+def estimate_weighted_demand(
    trajectories,
    action_weights=None,
    *,
    normalize: bool = False,
    per_session: bool = True,
 ):
    action_weights = (
        DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights
    )
@@ -64,12 +84,20 @@ def estimate_weighted_demand(trajectories, action_weights=None):
            if w <= 0:
                continue
            scores[product_id] = scores.get(product_id, 0.0) + w
-    total = sum(scores.values())
+    if not scores:
-    return (
+        return {}
-        {pid: (score / total) * 100 for pid, score in scores.items()}
+
-        if total > 0
+    if per_session and len(trajectories) > 0:
-        else {}
+        inv_n = 1.0 / float(len(trajectories))
-    )
+        scores = {pid: score * inv_n for pid, score in scores.items()}
    if not normalize:
        return scores
    total = float(sum(scores.values()))
    if total <= 0:
        return {}
    return {pid: (score / total) * 100.0 for pid, score in scores.items()}
 # Example usage
--- a/engine/lib/wrappers.py
+++ b/engine/lib/wrappers.py
@@ -32,17 +32,23 @@ class EconomicMetricsWrapper(gym.Wrapper):
        obs, reward, terminated, truncated, info = self.env.step(action)
        # extract from unwrapped env
-        prices = self.env.unwrapped._prices
+        quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float)
        effective_prices = np.asarray(
            info.get("effective_prices", quoted_prices), dtype=float
        )
        if effective_prices.shape != quoted_prices.shape:
            effective_prices = quoted_prices
        demand_dict = self.env.unwrapped._demand
-        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))])
+        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))])
        # core calculations
-        revenue = float(np.sum(prices * demand))
+        revenue = float(info.get("revenue", np.sum(effective_prices * demand)))
-        avg_price = float(np.mean(prices))
+        quoted_revenue = float(np.sum(quoted_prices * demand))
        avg_price = float(np.mean(effective_prices))
        margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
        coi_level = avg_price - self.p_min  # E[P] - p_min per thesis Def 1
-        self._price_history.append(prices.copy())
+        self._price_history.append(effective_prices.copy())
        self._revenue_history.append(revenue)
        # regret vs baseline (golden path)
@@ -53,6 +59,7 @@ class EconomicMetricsWrapper(gym.Wrapper):
        # inject structured metrics into info
        info["economics"] = {
            "revenue": revenue,
            "quoted_revenue": quoted_revenue,
            "margin": margin,
            "coi_level": coi_level,
            "regret": regret,
@@ -71,10 +78,13 @@ class EconomicMetricsWrapper(gym.Wrapper):
            "agent_prob",
            "alpha_adv",
            "alpha_nominal",
            "erosion_share",
            "effective_price_mean",
        ):
            if key in info:
                info["economics"][key] = info[key]
-        info["prices"] = prices.copy()
+        info["prices"] = quoted_prices.copy()
        info["effective_prices"] = effective_prices.copy()
        info["demand"] = demand.copy()
        return obs, reward, terminated, truncated, info
--- a/engine/spec.py
+++ b/engine/spec.py
@@ -72,6 +72,8 @@ class EnvSpec:
    max_steps: int = 100
    margin_floor: float = 0.05
    margin_floor_patience: int = 5
    agent_mu: float = 45.0
    agent_std: float = 15.0
@dataclass(frozen=True)
@@ -167,6 +169,8 @@ class TrainSpec:
            "max_steps": self.env.max_steps,
            "margin_floor": self.env.margin_floor,
            "margin_floor_patience": self.env.margin_floor_patience,
            "agent_mu": self.env.agent_mu,
            "agent_std": self.env.agent_std,
            "alpha": self.study.alpha,
            "lambda_coi": self.study.lambda_coi,
            "robust_radius": self.study.robust_radius,
@@ -246,6 +250,8 @@ class TrainSpec:
                max_steps=int(base["max_steps"]),
                margin_floor=float(base["margin_floor"]),
                margin_floor_patience=int(base["margin_floor_patience"]),
                agent_mu=float(base.get("agent_mu", 45.0)),
                agent_std=float(base.get("agent_std", 15.0)),
            ),
            study=StudySpec(
                alpha=float(base["alpha"]),
--- a/engine/studies/margin_erosion_alpha.py
+++ b/engine/studies/margin_erosion_alpha.py
@@ -31,6 +31,9 @@ def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict:
            "eval_freq": 5000,
            "eval_episodes": 10,
            "log_freq": 500,
            "robust_eval_enabled": False,
            "agent_mu": 12.0,
            "agent_std": 2.0,
        }
    )
    result = run_train_once(
--- a/engine/studies/plot_margin_erosion.py
+++ b/engine/studies/plot_margin_erosion.py
@@ -1,126 +0,0 @@
 """plot margin erosion: margin/COI/revenue vs α with thesis-quality formatting"""
 import json, sys
 from pathlib import Path
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 mpl.rcParams.update(
    {
        "font.size": 10,
        "axes.labelsize": 11,
        "axes.titlesize": 12,
        "xtick.labelsize": 9,
        "ytick.labelsize": 9,
        "legend.fontsize": 9,
        "figure.figsize": (7, 4),
        "figure.dpi": 150,
        "lines.linewidth": 1.5,
        "lines.markersize": 6,
        "errorbar.capsize": 3,
        "grid.alpha": 0.3,
    }
 )
 def plot_margin_erosion(data: dict, out: Path):
    s = data["summary"]
    αs = sorted([float(k.split("_")[1]) for k in s.keys()])
    def get(metric):
        return (
            [s[f"alpha_{α:.1f}"][f"{metric}_mean"] for α in αs],
            [s[f"alpha_{α:.1f}"][f"{metric}_std"] for α in αs],
        )
    margins, margin_e = get("margin")
    cois, coi_e = get("coi_level")
    revs, rev_e = get("revenue")
    fig, axes = plt.subplots(1, 3, figsize=(12, 3.5))
    axes[0].errorbar(
        αs,
        margins,
        yerr=margin_e,
        marker="o",
        capsize=4,
        label="Standard RL",
        color="#d62728",
    )
    axes[0].axhline(0.05, color="gray", linestyle="--", linewidth=1, label="Floor")
    axes[0].set(
        xlabel="Agent proportion (α)",
        ylabel="Margin",
        title="Margin erosion",
        ylim=(0, max(margins) * 1.2),
    )
    axes[0].grid(alpha=0.3)
    axes[0].legend(loc="upper right")
    axes[1].errorbar(αs, cois, yerr=coi_e, marker="s", capsize=4, color="#ff7f0e")
    axes[1].set(
        xlabel="Agent proportion (α)",
        ylabel="COI",
        title="COI collapse (E[P] - p_min)",
        ylim=(0, None),
    )
    axes[1].grid(alpha=0.3)
    axes[2].errorbar(αs, revs, yerr=rev_e, marker="^", capsize=4, color="#2ca02c")
    axes[2].set(
        xlabel="Agent proportion (α)",
        ylabel="Revenue",
        title="Revenue degradation",
        ylim=(0, None),
    )
    axes[2].grid(alpha=0.3)
    plt.tight_layout()
    pdf = out / "margin_erosion_alpha.pdf"
    png = out / "margin_erosion_alpha.png"
    plt.savefig(pdf, bbox_inches="tight", dpi=300)
    plt.savefig(png, bbox_inches="tight", dpi=150)
    print(f"→ {pdf}\n→ {png}")
 def print_latex(data: dict):
    s = data["summary"]
    αs = sorted([float(k.split("_")[1]) for k in s.keys()])
    print("\n% LaTeX table for appendix")
    print("\\begin{table}[h]\n\\centering")
    print("\\caption{Margin erosion: standard RL under agent contamination}")
    print("\\label{tab:margin_erosion}")
    print("\\begin{tabular}{cccc}\n\\toprule")
    print("α & Margin & COI & Revenue \\\\\n\\midrule")
    for α in αs:
        d = s[f"alpha_{α:.1f}"]
        print(
            f"{α:.1f} & ${d['margin_mean']:.3f} \\pm {d['margin_std']:.3f}$ & "
            f"${d['coi_level_mean']:.1f} \\pm {d['coi_level_std']:.1f}$ & "
            f"${d['revenue_mean']:.0f} \\pm {d['revenue_std']:.0f}$ \\\\"
        )
    print("\\bottomrule\n\\end{tabular}\n\\end{table}")
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        sys.exit("usage: python -m engine.studies.plot_margin_erosion <results.json>")
    path = Path(sys.argv[1])
    if not path.exists():
        sys.exit(f"error: {path} not found")
    with open(path) as f:
        data = json.load(f)
    plot_margin_erosion(data, path.parent)
    print_latex(data)
    print(
        f"\n{len(data['results'])} runs, {len(data['summary'])} α levels, "
        f"algos={data['config']['algos']}, seeds={data['config']['seeds']}"
    )