banner plot and mehtodlogy updates

This commit is contained in:
2026-03-27 16:58:41 +01:00
parent 105b014976
commit 18b41ff802
8 changed files with 295 additions and 8 deletions

View File

@@ -210,6 +210,48 @@ def _zone_summary(alpha_deltas: pd.DataFrame) -> pd.DataFrame:
)
def _alpha_product_coi_preservation(runs: pd.DataFrame) -> pd.DataFrame:
grouped = (
runs.groupby(["alpha", "n_products", "mode"], as_index=False)
.agg(
runs=("run_id", "size"),
coi_level_mean=("eval_coi_level_mean", "mean"),
)
.sort_values(["alpha", "n_products", "mode"])
.reset_index(drop=True)
)
rows: list[dict[str, float | int]] = []
for (alpha, n_products), group in grouped.groupby(
["alpha", "n_products"], sort=True
):
defended = group[group["mode"] == "defended"]
baseline = group[group["mode"] == "baseline"]
if defended.empty or baseline.empty:
continue
d_coi = float(defended["coi_level_mean"].iloc[0])
b_coi = float(baseline["coi_level_mean"].iloc[0])
rows.append(
{
"alpha": float(alpha),
"n_products": float(n_products),
"baseline_runs": int(baseline["runs"].iloc[0]),
"defended_runs": int(defended["runs"].iloc[0]),
"baseline_coi_level_mean": b_coi,
"defended_coi_level_mean": d_coi,
"coi_preserved": d_coi - b_coi,
"coi_preserved_pct": 0.0
if b_coi == 0.0
else 100.0 * (d_coi - b_coi) / b_coi,
}
)
return (
pd.DataFrame(rows).sort_values(["alpha", "n_products"]).reset_index(drop=True)
)
def _save_plot(fig: plt.Figure, path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(path, bbox_inches="tight")
@@ -217,6 +259,61 @@ def _save_plot(fig: plt.Figure, path: Path) -> Path:
return path
def _smoothed_curve(
x: np.ndarray,
y: np.ndarray,
*,
window: int = 5,
points: int = 320,
) -> tuple[np.ndarray, np.ndarray]:
x_values = np.asarray(x, dtype=float)
y_values = np.asarray(y, dtype=float)
mask = np.isfinite(x_values) & np.isfinite(y_values)
x_values = x_values[mask]
y_values = y_values[mask]
if x_values.size == 0:
return x_values, y_values
order = np.argsort(x_values)
x_values = x_values[order]
y_values = y_values[order]
unique_x = np.unique(x_values)
if unique_x.size != x_values.size:
dedup = (
pd.DataFrame({"x": x_values, "y": y_values})
.groupby("x", as_index=False)
.agg(y=("y", "mean"))
.sort_values("x")
)
x_values = dedup["x"].to_numpy(dtype=float)
y_values = dedup["y"].to_numpy(dtype=float)
if x_values.size < 3:
return x_values, y_values
win = int(max(3, window))
if win % 2 == 0:
win += 1
if win > x_values.size:
win = x_values.size if x_values.size % 2 == 1 else x_values.size - 1
if win < 3:
return x_values, y_values
half = win // 2
offsets = np.arange(-half, half + 1, dtype=float)
sigma = max(win / 3.0, 1.0)
kernel = np.exp(-0.5 * (offsets / sigma) ** 2)
kernel = kernel / np.sum(kernel)
y_padded = np.pad(y_values, (half, half), mode="edge")
y_smooth = np.convolve(y_padded, kernel, mode="valid")
n_points = max(int(points), x_values.size)
x_dense = np.linspace(float(np.min(x_values)), float(np.max(x_values)), n_points)
y_dense = np.interp(x_dense, x_values, y_smooth)
return x_dense, y_dense
def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
@@ -243,6 +340,148 @@ def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Pa
return _save_plot(fig, out_path)
def _plot_focus_coi_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
("baseline", "#4C72B0", "Baseline"),
("defended", "#C44E52", "Defended"),
):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
x_raw = sub["alpha"].to_numpy(dtype=float)
y_raw = sub["coi_level_mean"].to_numpy(dtype=float)
x_smooth, y_smooth = _smoothed_curve(x_raw, y_raw)
ax.plot(
x_smooth,
y_smooth,
linewidth=1.9,
color=color,
label=label,
)
ax.scatter(
x_raw,
y_raw,
s=18,
color=color,
edgecolor="#FFFFFF",
linewidth=0.45,
zorder=3,
)
paired = alpha_mode.pivot_table(
index="alpha",
columns="mode",
values="coi_level_mean",
aggfunc="mean",
).sort_index()
if {"baseline", "defended"}.issubset(set(paired.columns)):
paired = paired.dropna(subset=["baseline", "defended"], how="any")
if not paired.empty:
x = paired.index.to_numpy(dtype=float)
y_baseline = paired["baseline"].to_numpy(dtype=float)
y_defended = paired["defended"].to_numpy(dtype=float)
x_fill, y_baseline_smooth = _smoothed_curve(x, y_baseline)
_, y_defended_smooth = _smoothed_curve(x, y_defended)
ax.fill_between(
x_fill,
y_baseline_smooth,
y_defended_smooth,
color="#55A868",
alpha=0.12,
label="Gap",
)
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Mean COI level")
ax.set_title("Final Cohort COI Curves")
ax.legend(loc="lower left")
return _save_plot(fig, out_path)
def _plot_focus_coi_preservation_grid(
coi_preservation: pd.DataFrame, out_path: Path
) -> Path:
if coi_preservation.empty:
raise ValueError("COI preservation grid requires at least one paired cell")
alpha_levels = sorted(coi_preservation["alpha"].dropna().unique().tolist())
endpoint_targets = (0.0, 1.0)
endpoint_levels = [
alpha
for target in endpoint_targets
for alpha in alpha_levels
if np.isclose(alpha, target, atol=1e-9)
]
if len(endpoint_levels) < 2 and alpha_levels:
endpoint_levels = [alpha_levels[0], alpha_levels[-1]]
endpoint_levels = sorted(set(endpoint_levels))
data = coi_preservation[coi_preservation["alpha"].isin(endpoint_levels)].copy()
if data.empty:
raise ValueError(
"COI preservation grid has no rows for selected alpha endpoints"
)
alpha_levels = sorted(data["alpha"].dropna().unique().tolist())
product_levels = sorted(data["n_products"].dropna().unique().tolist())
bars = data.pivot_table(
index="n_products",
columns="alpha",
values="coi_preserved",
aggfunc="mean",
).reindex(index=product_levels, columns=alpha_levels)
x = np.arange(len(product_levels), dtype=float)
n_alpha = max(len(alpha_levels), 1)
bar_width = min(0.78 / n_alpha, 0.35)
offsets = (np.arange(n_alpha, dtype=float) - (n_alpha - 1) / 2.0) * bar_width
palette = ["#4C72B0", "#C44E52", "#55A868", "#8172B3"]
fig, ax = plt.subplots(figsize=(7.8, 5.0), constrained_layout=True)
for idx, alpha in enumerate(alpha_levels):
values = bars[alpha].to_numpy(dtype=float)
mask = np.isfinite(values)
if not np.any(mask):
continue
xpos = x[mask] + offsets[idx]
v = values[mask]
ax.bar(
xpos,
v,
width=bar_width * 0.96,
color=palette[idx % len(palette)],
label=rf"$\alpha={alpha:.1f}$",
)
for x_i, y_i in zip(xpos, v):
ax.text(
float(x_i),
float(y_i) + (0.035 if y_i >= 0 else -0.035),
f"{y_i:+.2f}",
ha="center",
va="bottom" if y_i >= 0 else "top",
fontsize=7,
)
valid = bars.to_numpy(dtype=float)
valid = valid[np.isfinite(valid)]
max_abs = float(np.max(np.abs(valid))) if valid.size else 1.0
max_abs = max(max_abs * 1.22, 0.4)
ax.set_ylim(-max_abs, max_abs)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.set_xticks(x)
ax.set_xticklabels([f"{int(v)}" for v in product_levels])
ax.set_xlabel("Product count")
ax.set_ylabel("COI preserved (defended minus baseline)")
ax.set_title("COI Preservation by Product Count at $\\alpha=0.0$ vs $\\alpha=1.0$")
ax.legend(loc="upper right")
ax.grid(axis="y", alpha=0.22)
return _save_plot(fig, out_path)
def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
x = alpha_deltas["alpha"].to_numpy(dtype=float)
@@ -326,6 +565,7 @@ def run(
alpha_mode = _alpha_mode_summary(focus_runs)
deltas = _alpha_deltas(alpha_mode)
zones = _zone_summary(deltas)
coi_preservation = _alpha_product_coi_preservation(focus_runs)
output_dir.mkdir(parents=True, exist_ok=True)
plot_dir.mkdir(parents=True, exist_ok=True)
@@ -343,6 +583,10 @@ def run(
zones.to_csv(zone_path, index=False)
written.append(zone_path)
coi_grid_path = output_dir / "final_focus_coi_preservation_grid.csv"
coi_preservation.to_csv(coi_grid_path, index=False)
written.append(coi_grid_path)
headline = {
"bundle": str(bundle_dir),
"focus_cohort": "max_alpha_coverage",
@@ -370,6 +614,18 @@ def run(
plot_dir / "final_focus_revenue_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_by_alpha(
alpha_mode,
plot_dir / "final_focus_coi_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_preservation_grid(
coi_preservation,
plot_dir / "final_focus_coi_preservation_grid.pdf",
)
)
written.append(
_plot_focus_revenue_delta(
deltas,
@@ -391,6 +647,20 @@ def run(
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_by_alpha.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_by_alpha.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_preservation_grid.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_preservation_grid.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_revenue_delta.tex",