mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
banner plot and mehtodlogy updates
This commit is contained in:
@@ -210,6 +210,48 @@ def _zone_summary(alpha_deltas: pd.DataFrame) -> pd.DataFrame:
|
||||
)
|
||||
|
||||
|
||||
def _alpha_product_coi_preservation(runs: pd.DataFrame) -> pd.DataFrame:
|
||||
grouped = (
|
||||
runs.groupby(["alpha", "n_products", "mode"], as_index=False)
|
||||
.agg(
|
||||
runs=("run_id", "size"),
|
||||
coi_level_mean=("eval_coi_level_mean", "mean"),
|
||||
)
|
||||
.sort_values(["alpha", "n_products", "mode"])
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
rows: list[dict[str, float | int]] = []
|
||||
for (alpha, n_products), group in grouped.groupby(
|
||||
["alpha", "n_products"], sort=True
|
||||
):
|
||||
defended = group[group["mode"] == "defended"]
|
||||
baseline = group[group["mode"] == "baseline"]
|
||||
if defended.empty or baseline.empty:
|
||||
continue
|
||||
|
||||
d_coi = float(defended["coi_level_mean"].iloc[0])
|
||||
b_coi = float(baseline["coi_level_mean"].iloc[0])
|
||||
rows.append(
|
||||
{
|
||||
"alpha": float(alpha),
|
||||
"n_products": float(n_products),
|
||||
"baseline_runs": int(baseline["runs"].iloc[0]),
|
||||
"defended_runs": int(defended["runs"].iloc[0]),
|
||||
"baseline_coi_level_mean": b_coi,
|
||||
"defended_coi_level_mean": d_coi,
|
||||
"coi_preserved": d_coi - b_coi,
|
||||
"coi_preserved_pct": 0.0
|
||||
if b_coi == 0.0
|
||||
else 100.0 * (d_coi - b_coi) / b_coi,
|
||||
}
|
||||
)
|
||||
|
||||
return (
|
||||
pd.DataFrame(rows).sort_values(["alpha", "n_products"]).reset_index(drop=True)
|
||||
)
|
||||
|
||||
|
||||
def _save_plot(fig: plt.Figure, path: Path) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(path, bbox_inches="tight")
|
||||
@@ -217,6 +259,61 @@ def _save_plot(fig: plt.Figure, path: Path) -> Path:
|
||||
return path
|
||||
|
||||
|
||||
def _smoothed_curve(
|
||||
x: np.ndarray,
|
||||
y: np.ndarray,
|
||||
*,
|
||||
window: int = 5,
|
||||
points: int = 320,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
x_values = np.asarray(x, dtype=float)
|
||||
y_values = np.asarray(y, dtype=float)
|
||||
mask = np.isfinite(x_values) & np.isfinite(y_values)
|
||||
x_values = x_values[mask]
|
||||
y_values = y_values[mask]
|
||||
if x_values.size == 0:
|
||||
return x_values, y_values
|
||||
|
||||
order = np.argsort(x_values)
|
||||
x_values = x_values[order]
|
||||
y_values = y_values[order]
|
||||
|
||||
unique_x = np.unique(x_values)
|
||||
if unique_x.size != x_values.size:
|
||||
dedup = (
|
||||
pd.DataFrame({"x": x_values, "y": y_values})
|
||||
.groupby("x", as_index=False)
|
||||
.agg(y=("y", "mean"))
|
||||
.sort_values("x")
|
||||
)
|
||||
x_values = dedup["x"].to_numpy(dtype=float)
|
||||
y_values = dedup["y"].to_numpy(dtype=float)
|
||||
|
||||
if x_values.size < 3:
|
||||
return x_values, y_values
|
||||
|
||||
win = int(max(3, window))
|
||||
if win % 2 == 0:
|
||||
win += 1
|
||||
if win > x_values.size:
|
||||
win = x_values.size if x_values.size % 2 == 1 else x_values.size - 1
|
||||
if win < 3:
|
||||
return x_values, y_values
|
||||
|
||||
half = win // 2
|
||||
offsets = np.arange(-half, half + 1, dtype=float)
|
||||
sigma = max(win / 3.0, 1.0)
|
||||
kernel = np.exp(-0.5 * (offsets / sigma) ** 2)
|
||||
kernel = kernel / np.sum(kernel)
|
||||
y_padded = np.pad(y_values, (half, half), mode="edge")
|
||||
y_smooth = np.convolve(y_padded, kernel, mode="valid")
|
||||
|
||||
n_points = max(int(points), x_values.size)
|
||||
x_dense = np.linspace(float(np.min(x_values)), float(np.max(x_values)), n_points)
|
||||
y_dense = np.interp(x_dense, x_values, y_smooth)
|
||||
return x_dense, y_dense
|
||||
|
||||
|
||||
def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
|
||||
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||
for mode, color, label in (
|
||||
@@ -243,6 +340,148 @@ def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Pa
|
||||
return _save_plot(fig, out_path)
|
||||
|
||||
|
||||
def _plot_focus_coi_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
|
||||
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||
for mode, color, label in (
|
||||
("baseline", "#4C72B0", "Baseline"),
|
||||
("defended", "#C44E52", "Defended"),
|
||||
):
|
||||
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||
if sub.empty:
|
||||
continue
|
||||
x_raw = sub["alpha"].to_numpy(dtype=float)
|
||||
y_raw = sub["coi_level_mean"].to_numpy(dtype=float)
|
||||
x_smooth, y_smooth = _smoothed_curve(x_raw, y_raw)
|
||||
ax.plot(
|
||||
x_smooth,
|
||||
y_smooth,
|
||||
linewidth=1.9,
|
||||
color=color,
|
||||
label=label,
|
||||
)
|
||||
ax.scatter(
|
||||
x_raw,
|
||||
y_raw,
|
||||
s=18,
|
||||
color=color,
|
||||
edgecolor="#FFFFFF",
|
||||
linewidth=0.45,
|
||||
zorder=3,
|
||||
)
|
||||
|
||||
paired = alpha_mode.pivot_table(
|
||||
index="alpha",
|
||||
columns="mode",
|
||||
values="coi_level_mean",
|
||||
aggfunc="mean",
|
||||
).sort_index()
|
||||
if {"baseline", "defended"}.issubset(set(paired.columns)):
|
||||
paired = paired.dropna(subset=["baseline", "defended"], how="any")
|
||||
if not paired.empty:
|
||||
x = paired.index.to_numpy(dtype=float)
|
||||
y_baseline = paired["baseline"].to_numpy(dtype=float)
|
||||
y_defended = paired["defended"].to_numpy(dtype=float)
|
||||
x_fill, y_baseline_smooth = _smoothed_curve(x, y_baseline)
|
||||
_, y_defended_smooth = _smoothed_curve(x, y_defended)
|
||||
ax.fill_between(
|
||||
x_fill,
|
||||
y_baseline_smooth,
|
||||
y_defended_smooth,
|
||||
color="#55A868",
|
||||
alpha=0.12,
|
||||
label="Gap",
|
||||
)
|
||||
|
||||
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
|
||||
ax.set_xlabel(r"Contamination $\alpha$")
|
||||
ax.set_ylabel("Mean COI level")
|
||||
ax.set_title("Final Cohort COI Curves")
|
||||
ax.legend(loc="lower left")
|
||||
return _save_plot(fig, out_path)
|
||||
|
||||
|
||||
def _plot_focus_coi_preservation_grid(
|
||||
coi_preservation: pd.DataFrame, out_path: Path
|
||||
) -> Path:
|
||||
if coi_preservation.empty:
|
||||
raise ValueError("COI preservation grid requires at least one paired cell")
|
||||
|
||||
alpha_levels = sorted(coi_preservation["alpha"].dropna().unique().tolist())
|
||||
endpoint_targets = (0.0, 1.0)
|
||||
endpoint_levels = [
|
||||
alpha
|
||||
for target in endpoint_targets
|
||||
for alpha in alpha_levels
|
||||
if np.isclose(alpha, target, atol=1e-9)
|
||||
]
|
||||
if len(endpoint_levels) < 2 and alpha_levels:
|
||||
endpoint_levels = [alpha_levels[0], alpha_levels[-1]]
|
||||
endpoint_levels = sorted(set(endpoint_levels))
|
||||
|
||||
data = coi_preservation[coi_preservation["alpha"].isin(endpoint_levels)].copy()
|
||||
if data.empty:
|
||||
raise ValueError(
|
||||
"COI preservation grid has no rows for selected alpha endpoints"
|
||||
)
|
||||
|
||||
alpha_levels = sorted(data["alpha"].dropna().unique().tolist())
|
||||
product_levels = sorted(data["n_products"].dropna().unique().tolist())
|
||||
|
||||
bars = data.pivot_table(
|
||||
index="n_products",
|
||||
columns="alpha",
|
||||
values="coi_preserved",
|
||||
aggfunc="mean",
|
||||
).reindex(index=product_levels, columns=alpha_levels)
|
||||
|
||||
x = np.arange(len(product_levels), dtype=float)
|
||||
n_alpha = max(len(alpha_levels), 1)
|
||||
bar_width = min(0.78 / n_alpha, 0.35)
|
||||
offsets = (np.arange(n_alpha, dtype=float) - (n_alpha - 1) / 2.0) * bar_width
|
||||
palette = ["#4C72B0", "#C44E52", "#55A868", "#8172B3"]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(7.8, 5.0), constrained_layout=True)
|
||||
for idx, alpha in enumerate(alpha_levels):
|
||||
values = bars[alpha].to_numpy(dtype=float)
|
||||
mask = np.isfinite(values)
|
||||
if not np.any(mask):
|
||||
continue
|
||||
xpos = x[mask] + offsets[idx]
|
||||
v = values[mask]
|
||||
ax.bar(
|
||||
xpos,
|
||||
v,
|
||||
width=bar_width * 0.96,
|
||||
color=palette[idx % len(palette)],
|
||||
label=rf"$\alpha={alpha:.1f}$",
|
||||
)
|
||||
for x_i, y_i in zip(xpos, v):
|
||||
ax.text(
|
||||
float(x_i),
|
||||
float(y_i) + (0.035 if y_i >= 0 else -0.035),
|
||||
f"{y_i:+.2f}",
|
||||
ha="center",
|
||||
va="bottom" if y_i >= 0 else "top",
|
||||
fontsize=7,
|
||||
)
|
||||
|
||||
valid = bars.to_numpy(dtype=float)
|
||||
valid = valid[np.isfinite(valid)]
|
||||
max_abs = float(np.max(np.abs(valid))) if valid.size else 1.0
|
||||
max_abs = max(max_abs * 1.22, 0.4)
|
||||
ax.set_ylim(-max_abs, max_abs)
|
||||
|
||||
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels([f"{int(v)}" for v in product_levels])
|
||||
ax.set_xlabel("Product count")
|
||||
ax.set_ylabel("COI preserved (defended minus baseline)")
|
||||
ax.set_title("COI Preservation by Product Count at $\\alpha=0.0$ vs $\\alpha=1.0$")
|
||||
ax.legend(loc="upper right")
|
||||
ax.grid(axis="y", alpha=0.22)
|
||||
return _save_plot(fig, out_path)
|
||||
|
||||
|
||||
def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
|
||||
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||
x = alpha_deltas["alpha"].to_numpy(dtype=float)
|
||||
@@ -326,6 +565,7 @@ def run(
|
||||
alpha_mode = _alpha_mode_summary(focus_runs)
|
||||
deltas = _alpha_deltas(alpha_mode)
|
||||
zones = _zone_summary(deltas)
|
||||
coi_preservation = _alpha_product_coi_preservation(focus_runs)
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
plot_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -343,6 +583,10 @@ def run(
|
||||
zones.to_csv(zone_path, index=False)
|
||||
written.append(zone_path)
|
||||
|
||||
coi_grid_path = output_dir / "final_focus_coi_preservation_grid.csv"
|
||||
coi_preservation.to_csv(coi_grid_path, index=False)
|
||||
written.append(coi_grid_path)
|
||||
|
||||
headline = {
|
||||
"bundle": str(bundle_dir),
|
||||
"focus_cohort": "max_alpha_coverage",
|
||||
@@ -370,6 +614,18 @@ def run(
|
||||
plot_dir / "final_focus_revenue_by_alpha.pdf",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_plot_focus_coi_by_alpha(
|
||||
alpha_mode,
|
||||
plot_dir / "final_focus_coi_by_alpha.pdf",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_plot_focus_coi_preservation_grid(
|
||||
coi_preservation,
|
||||
plot_dir / "final_focus_coi_preservation_grid.pdf",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_plot_focus_revenue_delta(
|
||||
deltas,
|
||||
@@ -391,6 +647,20 @@ def run(
|
||||
"0.98\\linewidth",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_write_include(
|
||||
include_dir / "final_focus_coi_by_alpha.tex",
|
||||
"chapters/figures/results/generated/final/plots/final_focus_coi_by_alpha.pdf",
|
||||
"0.98\\linewidth",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_write_include(
|
||||
include_dir / "final_focus_coi_preservation_grid.tex",
|
||||
"chapters/figures/results/generated/final/plots/final_focus_coi_preservation_grid.pdf",
|
||||
"0.98\\linewidth",
|
||||
)
|
||||
)
|
||||
written.append(
|
||||
_write_include(
|
||||
include_dir / "final_focus_revenue_delta.tex",
|
||||
|
||||
Reference in New Issue
Block a user