mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
134 lines
4.2 KiB
Python
134 lines
4.2 KiB
Python
"""validate core thesis problem: margin erosion under agent contamination
|
||
trains standard RL (no robust components) across α levels to demonstrate systematic failure
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import json, sys, time
|
||
from pathlib import Path
|
||
import numpy as np
|
||
|
||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||
from engine.spec import TrainSpec
|
||
from engine.orchestrators import run_train_once
|
||
|
||
|
||
def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict:
|
||
spec = TrainSpec.from_flat(
|
||
{
|
||
"algo": algo,
|
||
"seed": seed,
|
||
"alpha": alpha,
|
||
"total_timesteps": steps,
|
||
"lambda_coi": 0.0,
|
||
"robust_radius": 0.0,
|
||
"robust_points": 1,
|
||
"robust_rollouts": 1,
|
||
"no_robust": True,
|
||
"arch": "small",
|
||
"n_products": 10,
|
||
"N": 100,
|
||
"max_steps": 50,
|
||
"eval_freq": 5000,
|
||
"eval_episodes": 10,
|
||
"log_freq": 500,
|
||
"robust_eval_enabled": False,
|
||
"agent_mu": 12.0,
|
||
"agent_std": 2.0,
|
||
}
|
||
)
|
||
result = run_train_once(
|
||
spec,
|
||
project="phantom-margin-erosion",
|
||
offline=True,
|
||
no_wandb=True,
|
||
kind="study",
|
||
scenario=f"alpha{int(alpha * 100):02d}",
|
||
group=f"baseline_{algo}",
|
||
extra_tags=("margin_erosion", "baseline"),
|
||
)
|
||
return {
|
||
"alpha": alpha,
|
||
"algo": algo,
|
||
"seed": seed,
|
||
"eval_reward": result.get("eval/reward_mean", np.nan),
|
||
"eval_revenue": result.get("eval/revenue_mean", np.nan),
|
||
"eval_coi_level": result.get("eval/coi_level_mean", np.nan),
|
||
"eval_margin": result.get("eval/margin_mean", np.nan),
|
||
"eval_agent_prob": result.get("eval/agent_prob_mean", np.nan),
|
||
}
|
||
|
||
|
||
def run_margin_erosion_study(
|
||
alphas: list[float] | None = None,
|
||
algos: list[str] | None = None,
|
||
seeds: int = 3,
|
||
steps: int = 30_000,
|
||
) -> dict:
|
||
alphas = alphas or [0.1, 0.3, 0.5, 0.7, 0.9]
|
||
algos = algos or ["ppo", "dqn", "qtable"]
|
||
output_dir = Path(__file__).parent / "results"
|
||
output_dir.mkdir(exist_ok=True)
|
||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||
|
||
results = []
|
||
for α in alphas:
|
||
for algo in algos:
|
||
for si in range(seeds):
|
||
seed = 42 + si
|
||
print(f"α={α:.1f} {algo} seed={seed}")
|
||
m = _run_baseline(α, algo, seed, steps)
|
||
results.append(m)
|
||
print(
|
||
f" margin={m['eval_margin']:.3f} rev={m['eval_revenue']:.0f} coi={m['eval_coi_level']:.1f}"
|
||
)
|
||
|
||
summary = {}
|
||
for α in alphas:
|
||
runs = [r for r in results if abs(r["alpha"] - α) < 0.01]
|
||
if not runs:
|
||
continue
|
||
s = {}
|
||
for metric in ["margin", "revenue", "coi_level", "agent_prob"]:
|
||
vals = [r[f"eval_{metric}"] for r in runs]
|
||
s[f"{metric}_mean"] = float(np.mean(vals))
|
||
s[f"{metric}_std"] = float(np.std(vals))
|
||
s["n_runs"] = len(runs)
|
||
summary[f"alpha_{α:.1f}"] = s
|
||
|
||
output = {
|
||
"timestamp": ts,
|
||
"config": {"alphas": alphas, "algos": algos, "seeds": seeds, "steps": steps},
|
||
"results": results,
|
||
"summary": summary,
|
||
}
|
||
|
||
path = output_dir / f"margin_erosion_alpha_{ts}.json"
|
||
with open(path, "w") as f:
|
||
json.dump(output, f, indent=2)
|
||
|
||
print(f"\n→ {path}")
|
||
for α in alphas:
|
||
k = f"alpha_{α:.1f}"
|
||
if k in summary:
|
||
s = summary[k]
|
||
print(
|
||
f" {k}: margin={s['margin_mean']:.3f}±{s['margin_std']:.3f} "
|
||
f"coi={s['coi_level_mean']:.1f}±{s['coi_level_std']:.1f}"
|
||
)
|
||
return output
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import argparse
|
||
|
||
p = argparse.ArgumentParser(description="margin erosion vs α")
|
||
p.add_argument("--quick", action="store_true", help="fast test")
|
||
args = p.parse_args()
|
||
|
||
run_margin_erosion_study(
|
||
alphas=[0.1, 0.7] if args.quick else [0.1, 0.3, 0.5, 0.7, 0.9],
|
||
algos=["qtable"] if args.quick else ["ppo", "dqn", "qtable"],
|
||
seeds=1 if args.quick else 3,
|
||
steps=5_000 if args.quick else 30_000,
|
||
)
|