Files
PHANTOM/paper/src/chapters/figures/results/process_final_sweeps.py
2026-04-09 10:09:18 +02:00

705 lines
23 KiB
Python

from __future__ import annotations
import argparse
import json
from pathlib import Path
import subprocess
from typing import Any
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_bundle_dir() -> Path:
base = _project_root() / "engine" / "studies" / "results" / "wandb_sweep_bundles"
bundles = sorted(
[path for path in base.glob("bundle_*") if path.is_dir()],
key=lambda path: path.stat().st_mtime,
reverse=True,
)
if not bundles:
raise FileNotFoundError(f"No sweep bundle directories found in {base}")
return bundles[0]
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "final"
def _default_plot_dir(output_dir: Path) -> Path:
return output_dir / "plots"
def _git_commit() -> str:
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
check=True,
text=True,
capture_output=True,
cwd=_project_root(),
)
except Exception:
return "unknown"
return result.stdout.strip()
def _truthy(value: Any) -> bool:
if isinstance(value, bool):
return value
if value is None:
return False
return str(value).strip().lower() in {"1", "true", "yes", "on"}
def _mode_of(row: pd.Series) -> str:
mode_hint = str(row.get("study_mode", "")).strip().lower()
if mode_hint in {"baseline", "no_robust"}:
return "baseline"
if mode_hint in {"defended", "robust"}:
return "defended"
if _truthy(row.get("baseline_mode")) or _truthy(row.get("no_robust")):
return "baseline"
return "defended"
def _coerce_numeric(frame: pd.DataFrame, columns: list[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _configure_style() -> None:
plt.rcParams.update(
{
"font.family": "serif",
"font.size": 10,
"axes.titlesize": 10,
"axes.labelsize": 9,
"legend.fontsize": 8,
"xtick.labelsize": 8,
"ytick.labelsize": 8,
"figure.dpi": 220,
"savefig.dpi": 320,
"axes.spines.top": False,
"axes.spines.right": False,
"axes.grid": True,
"grid.alpha": 0.22,
}
)
def _load_runs(bundle_dir: Path) -> pd.DataFrame:
path = bundle_dir / "runs_finished.csv"
if not path.exists():
raise FileNotFoundError(f"Missing required file: {path}")
frame = pd.read_csv(path)
frame["mode"] = frame.apply(_mode_of, axis=1)
_coerce_numeric(
frame,
[
"alpha",
"n_products",
"eval_revenue_mean",
"eval_reward_mean",
"eval_supra_share_mean",
"eval_volatility_mean",
"eval_coi_level_mean",
"eval_coi_leakage_mean",
"objective_score",
],
)
return frame
def _focus_sweep(runs: pd.DataFrame) -> str:
coverage = (
runs.groupby("sweep_id", as_index=False)
.agg(
n_alpha=("alpha", lambda s: int(pd.Series(s).dropna().nunique())),
max_alpha=("alpha", "max"),
run_count=("run_id", "size"),
)
.sort_values(
["n_alpha", "max_alpha", "run_count"], ascending=[False, False, False]
)
)
if coverage.empty:
raise ValueError("No sweep rows available in runs_finished.csv")
return str(coverage.iloc[0]["sweep_id"])
def _alpha_mode_summary(runs: pd.DataFrame) -> pd.DataFrame:
return (
runs.groupby(["alpha", "mode"], as_index=False)
.agg(
runs=("run_id", "size"),
revenue_mean=("eval_revenue_mean", "mean"),
reward_mean=("eval_reward_mean", "mean"),
supra_mean=("eval_supra_share_mean", "mean"),
volatility_mean=("eval_volatility_mean", "mean"),
coi_leakage_mean=("eval_coi_leakage_mean", "mean"),
coi_level_mean=("eval_coi_level_mean", "mean"),
)
.sort_values(["alpha", "mode"])
.reset_index(drop=True)
)
def _alpha_deltas(alpha_mode: pd.DataFrame) -> pd.DataFrame:
rows: list[dict[str, float]] = []
for alpha, group in alpha_mode.groupby("alpha", sort=True):
defended = group[group["mode"] == "defended"]
baseline = group[group["mode"] == "baseline"]
if defended.empty or baseline.empty:
continue
d_rev = float(defended["revenue_mean"].iloc[0])
b_rev = float(baseline["revenue_mean"].iloc[0])
d_reward = float(defended["reward_mean"].iloc[0])
b_reward = float(baseline["reward_mean"].iloc[0])
d_vol = float(defended["volatility_mean"].iloc[0])
b_vol = float(baseline["volatility_mean"].iloc[0])
d_supra = float(defended["supra_mean"].iloc[0])
b_supra = float(baseline["supra_mean"].iloc[0])
d_coi_leak = float(defended["coi_leakage_mean"].iloc[0])
b_coi_leak = float(baseline["coi_leakage_mean"].iloc[0])
rows.append(
{
"alpha": float(alpha),
"revenue_delta": d_rev - b_rev,
"revenue_delta_pct": 0.0
if b_rev == 0.0
else 100.0 * (d_rev - b_rev) / b_rev,
"reward_delta": d_reward - b_reward,
"reward_delta_pct": 0.0
if b_reward == 0.0
else 100.0 * (d_reward - b_reward) / b_reward,
"volatility_delta": d_vol - b_vol,
"supra_delta": d_supra - b_supra,
"coi_leakage_delta": d_coi_leak - b_coi_leak,
}
)
return pd.DataFrame(rows).sort_values("alpha").reset_index(drop=True)
def _zone_summary(alpha_deltas: pd.DataFrame) -> pd.DataFrame:
if alpha_deltas.empty:
return pd.DataFrame()
data = alpha_deltas.copy()
data["zone"] = np.where(
data["alpha"] >= 0.7, "high_alpha_0_7_plus", "low_alpha_below_0_7"
)
return (
data.groupby("zone", as_index=False)
.agg(
alpha_cells=("alpha", "size"),
revenue_delta_pct_mean=("revenue_delta_pct", "mean"),
reward_delta_pct_mean=("reward_delta_pct", "mean"),
coi_leakage_delta_mean=("coi_leakage_delta", "mean"),
volatility_delta_mean=("volatility_delta", "mean"),
)
.sort_values("zone")
)
def _alpha_product_coi_preservation(runs: pd.DataFrame) -> pd.DataFrame:
grouped = (
runs.groupby(["alpha", "n_products", "mode"], as_index=False)
.agg(
runs=("run_id", "size"),
coi_level_mean=("eval_coi_level_mean", "mean"),
)
.sort_values(["alpha", "n_products", "mode"])
.reset_index(drop=True)
)
rows: list[dict[str, float | int]] = []
for (alpha, n_products), group in grouped.groupby(
["alpha", "n_products"], sort=True
):
defended = group[group["mode"] == "defended"]
baseline = group[group["mode"] == "baseline"]
if defended.empty or baseline.empty:
continue
d_coi = float(defended["coi_level_mean"].iloc[0])
b_coi = float(baseline["coi_level_mean"].iloc[0])
rows.append(
{
"alpha": float(alpha),
"n_products": float(n_products),
"baseline_runs": int(baseline["runs"].iloc[0]),
"defended_runs": int(defended["runs"].iloc[0]),
"baseline_coi_level_mean": b_coi,
"defended_coi_level_mean": d_coi,
"coi_preserved": d_coi - b_coi,
"coi_preserved_pct": 0.0
if b_coi == 0.0
else 100.0 * (d_coi - b_coi) / b_coi,
}
)
return (
pd.DataFrame(rows).sort_values(["alpha", "n_products"]).reset_index(drop=True)
)
def _save_plot(fig: plt.Figure, path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(path, bbox_inches="tight")
plt.close(fig)
return path
def _smoothed_curve(
x: np.ndarray,
y: np.ndarray,
*,
window: int = 5,
points: int = 320,
) -> tuple[np.ndarray, np.ndarray]:
x_values = np.asarray(x, dtype=float)
y_values = np.asarray(y, dtype=float)
mask = np.isfinite(x_values) & np.isfinite(y_values)
x_values = x_values[mask]
y_values = y_values[mask]
if x_values.size == 0:
return x_values, y_values
order = np.argsort(x_values)
x_values = x_values[order]
y_values = y_values[order]
unique_x = np.unique(x_values)
if unique_x.size != x_values.size:
dedup = (
pd.DataFrame({"x": x_values, "y": y_values})
.groupby("x", as_index=False)
.agg(y=("y", "mean"))
.sort_values("x")
)
x_values = dedup["x"].to_numpy(dtype=float)
y_values = dedup["y"].to_numpy(dtype=float)
if x_values.size < 3:
return x_values, y_values
win = int(max(3, window))
if win % 2 == 0:
win += 1
if win > x_values.size:
win = x_values.size if x_values.size % 2 == 1 else x_values.size - 1
if win < 3:
return x_values, y_values
half = win // 2
offsets = np.arange(-half, half + 1, dtype=float)
sigma = max(win / 3.0, 1.0)
kernel = np.exp(-0.5 * (offsets / sigma) ** 2)
kernel = kernel / np.sum(kernel)
y_padded = np.pad(y_values, (half, half), mode="edge")
y_smooth = np.convolve(y_padded, kernel, mode="valid")
n_points = max(int(points), x_values.size)
x_dense = np.linspace(float(np.min(x_values)), float(np.max(x_values)), n_points)
y_dense = np.interp(x_dense, x_values, y_smooth)
return x_dense, y_dense
def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
("baseline", "#4C72B0", "Baseline"),
("defended", "#C44E52", "Defended"),
):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
ax.plot(
sub["alpha"],
sub["revenue_mean"],
marker="o",
linewidth=1.9,
markersize=4,
color=color,
label=label,
)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Mean episode revenue")
ax.set_title("Final Cohort Revenue Curves")
ax.legend(loc="lower left")
return _save_plot(fig, out_path)
def _plot_focus_coi_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
("baseline", "#4C72B0", "Baseline"),
("defended", "#C44E52", "Defended"),
):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
x_raw = sub["alpha"].to_numpy(dtype=float)
y_raw = sub["coi_level_mean"].to_numpy(dtype=float)
x_smooth, y_smooth = _smoothed_curve(x_raw, y_raw)
ax.plot(
x_smooth,
y_smooth,
linewidth=1.9,
color=color,
label=label,
)
ax.scatter(
x_raw,
y_raw,
s=18,
color=color,
edgecolor="#FFFFFF",
linewidth=0.45,
zorder=3,
)
paired = alpha_mode.pivot_table(
index="alpha",
columns="mode",
values="coi_level_mean",
aggfunc="mean",
).sort_index()
if {"baseline", "defended"}.issubset(set(paired.columns)):
paired = paired.dropna(subset=["baseline", "defended"], how="any")
if not paired.empty:
x = paired.index.to_numpy(dtype=float)
y_baseline = paired["baseline"].to_numpy(dtype=float)
y_defended = paired["defended"].to_numpy(dtype=float)
x_fill, y_baseline_smooth = _smoothed_curve(x, y_baseline)
_, y_defended_smooth = _smoothed_curve(x, y_defended)
ax.fill_between(
x_fill,
y_baseline_smooth,
y_defended_smooth,
color="#55A868",
alpha=0.12,
label="Gap",
)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Mean COI level")
ax.set_title("Final Cohort COI Curves")
ax.legend(loc="lower left")
return _save_plot(fig, out_path)
def _plot_focus_coi_preservation_grid(
coi_preservation: pd.DataFrame, out_path: Path
) -> Path:
if coi_preservation.empty:
raise ValueError("COI preservation grid requires at least one paired cell")
alpha_levels = sorted(coi_preservation["alpha"].dropna().unique().tolist())
endpoint_targets = (0.0, 1.0)
endpoint_levels = [
alpha
for target in endpoint_targets
for alpha in alpha_levels
if np.isclose(alpha, target, atol=1e-9)
]
if len(endpoint_levels) < 2 and alpha_levels:
endpoint_levels = [alpha_levels[0], alpha_levels[-1]]
endpoint_levels = sorted(set(endpoint_levels))
data = coi_preservation[coi_preservation["alpha"].isin(endpoint_levels)].copy()
if data.empty:
raise ValueError(
"COI preservation grid has no rows for selected alpha endpoints"
)
alpha_levels = sorted(data["alpha"].dropna().unique().tolist())
product_levels = sorted(data["n_products"].dropna().unique().tolist())
bars = data.pivot_table(
index="n_products",
columns="alpha",
values="coi_preserved",
aggfunc="mean",
).reindex(index=product_levels, columns=alpha_levels)
x = np.arange(len(product_levels), dtype=float)
n_alpha = max(len(alpha_levels), 1)
bar_width = min(0.78 / n_alpha, 0.35)
offsets = (np.arange(n_alpha, dtype=float) - (n_alpha - 1) / 2.0) * bar_width
palette = ["#4C72B0", "#C44E52", "#55A868", "#8172B3"]
fig, ax = plt.subplots(figsize=(7.8, 5.0), constrained_layout=True)
for idx, alpha in enumerate(alpha_levels):
values = bars[alpha].to_numpy(dtype=float)
mask = np.isfinite(values)
if not np.any(mask):
continue
xpos = x[mask] + offsets[idx]
v = values[mask]
ax.bar(
xpos,
v,
width=bar_width * 0.96,
color=palette[idx % len(palette)],
label=rf"$\alpha={alpha:.1f}$",
)
for x_i, y_i in zip(xpos, v):
ax.text(
float(x_i),
float(y_i) + (0.035 if y_i >= 0 else -0.035),
f"{y_i:+.2f}",
ha="center",
va="bottom" if y_i >= 0 else "top",
fontsize=7,
)
valid = bars.to_numpy(dtype=float)
valid = valid[np.isfinite(valid)]
max_abs = float(np.max(np.abs(valid))) if valid.size else 1.0
max_abs = max(max_abs * 1.22, 0.4)
ax.set_ylim(-max_abs, max_abs)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.set_xticks(x)
ax.set_xticklabels([f"{int(v)}" for v in product_levels])
ax.set_xlabel("Product count")
ax.set_ylabel("COI preserved (defended minus baseline)")
ax.set_title("COI Preservation by Product Count at $\\alpha=0.0$ vs $\\alpha=1.0$")
ax.legend(loc="upper right")
ax.grid(axis="y", alpha=0.22)
return _save_plot(fig, out_path)
def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
x = alpha_deltas["alpha"].to_numpy(dtype=float)
y = alpha_deltas["revenue_delta_pct"].to_numpy(dtype=float)
ax.plot(x, y, marker="o", linewidth=2.0, markersize=4, color="#C44E52")
ax.fill_between(x, y, 0.0, color="#C44E52", alpha=0.12)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
high = alpha_deltas[alpha_deltas["alpha"] >= 0.7]
if not high.empty:
best = high.reindex(
high["revenue_delta_pct"].abs().sort_values(ascending=False).index
).iloc[0]
ax.scatter(
[best["alpha"]],
[best["revenue_delta_pct"]],
color="#1f77b4",
s=45,
zorder=3,
)
ax.annotate(
f"high-alpha peak {best['revenue_delta_pct']:.2f}%",
(float(best["alpha"]), float(best["revenue_delta_pct"])),
textcoords="offset points",
xytext=(6, 6),
fontsize=8,
)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Defended minus baseline revenue (%)")
ax.set_title("Revenue Delta by Contamination (Final Cohort)")
return _save_plot(fig, out_path)
def _plot_focus_risk_deltas(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
x = alpha_deltas["alpha"].to_numpy(dtype=float)
ax.plot(
x,
alpha_deltas["coi_leakage_delta"].to_numpy(dtype=float),
marker="o",
linewidth=1.8,
markersize=4,
color="#55A868",
label="COI leakage delta",
)
ax.plot(
x,
alpha_deltas["volatility_delta"].to_numpy(dtype=float),
marker="s",
linewidth=1.8,
markersize=3.8,
color="#8172B3",
label="Volatility delta",
)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Defended minus baseline")
ax.set_title("Leakage and Stability Deltas (Final Cohort)")
ax.legend(loc="lower left")
return _save_plot(fig, out_path)
def _write_include(path: Path, figure_rel_path: str, width: str) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(f"\\includegraphics[width={width}]{{{figure_rel_path}}}\n")
return path
def run(
bundle_dir: Path,
output_dir: Path,
plot_dir: Path,
focus_sweep_id: str | None = None,
) -> list[Path]:
all_runs = _load_runs(bundle_dir)
focus_id = str(focus_sweep_id) if focus_sweep_id else _focus_sweep(all_runs)
if focus_id not in set(all_runs["sweep_id"].astype(str).unique()):
raise ValueError(f"Requested focus sweep_id not found: {focus_id}")
focus_runs = all_runs[all_runs["sweep_id"] == focus_id].copy()
alpha_mode = _alpha_mode_summary(focus_runs)
deltas = _alpha_deltas(alpha_mode)
zones = _zone_summary(deltas)
coi_preservation = _alpha_product_coi_preservation(focus_runs)
output_dir.mkdir(parents=True, exist_ok=True)
plot_dir.mkdir(parents=True, exist_ok=True)
written: list[Path] = []
alpha_mode_path = output_dir / "final_focus_alpha_mode_summary.csv"
alpha_mode.to_csv(alpha_mode_path, index=False)
written.append(alpha_mode_path)
delta_path = output_dir / "final_focus_alpha_deltas.csv"
deltas.to_csv(delta_path, index=False)
written.append(delta_path)
zone_path = output_dir / "final_focus_zone_summary.csv"
zones.to_csv(zone_path, index=False)
written.append(zone_path)
coi_grid_path = output_dir / "final_focus_coi_preservation_grid.csv"
coi_preservation.to_csv(coi_grid_path, index=False)
written.append(coi_grid_path)
headline = {
"bundle": str(bundle_dir),
"focus_cohort": "max_alpha_coverage",
"focus_sweep_id": focus_id,
"focus_run_count": int(len(focus_runs)),
"git_commit": _git_commit(),
"alpha_cells": int(deltas["alpha"].nunique()) if not deltas.empty else 0,
"alpha_min": float(deltas["alpha"].min()) if not deltas.empty else None,
"alpha_max": float(deltas["alpha"].max()) if not deltas.empty else None,
"mean_revenue_delta_pct": float(deltas["revenue_delta_pct"].mean())
if not deltas.empty
else None,
"mean_reward_delta_pct": float(deltas["reward_delta_pct"].mean())
if not deltas.empty
else None,
"zone_summary": zones.to_dict(orient="records"),
}
headline_path = output_dir / "final_focus_headline_summary.json"
headline_path.write_text(json.dumps(headline, indent=2) + "\n")
written.append(headline_path)
written.append(
_plot_focus_revenue_by_alpha(
alpha_mode,
plot_dir / "final_focus_revenue_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_by_alpha(
alpha_mode,
plot_dir / "final_focus_coi_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_preservation_grid(
coi_preservation,
plot_dir / "final_focus_coi_preservation_grid.pdf",
)
)
written.append(
_plot_focus_revenue_delta(
deltas,
plot_dir / "final_focus_revenue_delta.pdf",
)
)
written.append(
_plot_focus_risk_deltas(
deltas,
plot_dir / "final_focus_risk_deltas.pdf",
)
)
include_dir = Path(__file__).resolve().parent / "includes"
written.append(
_write_include(
include_dir / "final_focus_revenue_by_alpha.tex",
"chapters/figures/results/generated/final/plots/final_focus_revenue_by_alpha.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_by_alpha.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_by_alpha.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_preservation_grid.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_preservation_grid.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_revenue_delta.tex",
"chapters/figures/results/generated/final/plots/final_focus_revenue_delta.pdf",
"0.95\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_risk_deltas.tex",
"chapters/figures/results/generated/final/plots/final_focus_risk_deltas.pdf",
"0.95\\linewidth",
)
)
return written
def main() -> None:
parser = argparse.ArgumentParser(
description="Generate final paper figures/tables from the final sweep cohort"
)
parser.add_argument("--bundle-dir", type=Path, default=_default_bundle_dir())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--plot-dir", type=Path, default=None)
parser.add_argument("--focus-sweep-id", type=str, default=None)
args = parser.parse_args()
_configure_style()
plot_dir = (
args.plot_dir
if args.plot_dir is not None
else _default_plot_dir(args.output_dir)
)
outputs = run(
bundle_dir=args.bundle_dir,
output_dir=args.output_dir,
plot_dir=plot_dir,
focus_sweep_id=args.focus_sweep_id,
)
for path in outputs:
print(path)
if __name__ == "__main__":
main()