PHANTOM/engine/studies/margin_erosion_alpha.py

"""validate core thesis problem: margin erosion under agent contamination
trains standard RL (no robust components) across α levels to demonstrate systematic failure
"""

from __future__ import annotations
import json, sys, time
from pathlib import Path
import numpy as np

sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from engine.spec import TrainSpec
from engine.orchestrators import run_train_once


def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict:
    spec = TrainSpec.from_flat(
        {
            "algo": algo,
            "seed": seed,
            "alpha": alpha,
            "total_timesteps": steps,
            "lambda_coi": 0.0,
            "robust_radius": 0.0,
            "robust_points": 1,
            "robust_rollouts": 1,
            "no_robust": True,
            "arch": "small",
            "n_products": 10,
            "N": 100,
            "max_steps": 50,
            "eval_freq": 5000,
            "eval_episodes": 10,
            "log_freq": 500,
            "robust_eval_enabled": False,
            "agent_mu": 12.0,
            "agent_std": 2.0,
        }
    )
    result = run_train_once(
        spec,
        project="phantom-margin-erosion",
        offline=True,
        no_wandb=True,
        kind="study",
        scenario=f"alpha{int(alpha * 100):02d}",
        group=f"baseline_{algo}",
        extra_tags=("margin_erosion", "baseline"),
    )
    return {
        "alpha": alpha,
        "algo": algo,
        "seed": seed,
        "eval_reward": result.get("eval/reward_mean", np.nan),
        "eval_revenue": result.get("eval/revenue_mean", np.nan),
        "eval_coi_level": result.get("eval/coi_level_mean", np.nan),
        "eval_margin": result.get("eval/margin_mean", np.nan),
        "eval_agent_prob": result.get("eval/agent_prob_mean", np.nan),
    }


def run_margin_erosion_study(
    alphas: list[float] | None = None,
    algos: list[str] | None = None,
    seeds: int = 3,
    steps: int = 30_000,
) -> dict:
    alphas = alphas or [0.1, 0.3, 0.5, 0.7, 0.9]
    algos = algos or ["ppo", "dqn", "qtable"]
    output_dir = Path(__file__).parent / "results"
    output_dir.mkdir(exist_ok=True)
    ts = time.strftime("%Y%m%d_%H%M%S")

    results = []
    for α in alphas:
        for algo in algos:
            for si in range(seeds):
                seed = 42 + si
                print(f"α={α:.1f} {algo} seed={seed}")
                m = _run_baseline(α, algo, seed, steps)
                results.append(m)
                print(
                    f"  margin={m['eval_margin']:.3f} rev={m['eval_revenue']:.0f} coi={m['eval_coi_level']:.1f}"
                )

    summary = {}
    for α in alphas:
        runs = [r for r in results if abs(r["alpha"] - α) < 0.01]
        if not runs:
            continue
        s = {}
        for metric in ["margin", "revenue", "coi_level", "agent_prob"]:
            vals = [r[f"eval_{metric}"] for r in runs]
            s[f"{metric}_mean"] = float(np.mean(vals))
            s[f"{metric}_std"] = float(np.std(vals))
        s["n_runs"] = len(runs)
        summary[f"alpha_{α:.1f}"] = s

    output = {
        "timestamp": ts,
        "config": {"alphas": alphas, "algos": algos, "seeds": seeds, "steps": steps},
        "results": results,
        "summary": summary,
    }

    path = output_dir / f"margin_erosion_alpha_{ts}.json"
    with open(path, "w") as f:
        json.dump(output, f, indent=2)

    print(f"\n→ {path}")
    for α in alphas:
        k = f"alpha_{α:.1f}"
        if k in summary:
            s = summary[k]
            print(
                f"  {k}: margin={s['margin_mean']:.3f}±{s['margin_std']:.3f} "
                f"coi={s['coi_level_mean']:.1f}±{s['coi_level_std']:.1f}"
            )
    return output


if __name__ == "__main__":
    import argparse

    p = argparse.ArgumentParser(description="margin erosion vs α")
    p.add_argument("--quick", action="store_true", help="fast test")
    args = p.parse_args()

    run_margin_erosion_study(
        alphas=[0.1, 0.7] if args.quick else [0.1, 0.3, 0.5, 0.7, 0.9],
        algos=["qtable"] if args.quick else ["ppo", "dqn", "qtable"],
        seeds=1 if args.quick else 3,
        steps=5_000 if args.quick else 30_000,
    )