mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
feature: telemetry logging
This commit is contained in:
@@ -19,7 +19,10 @@ def make_env(cfg: Mapping[str, Any]):
|
||||
lambda_coi=float(cfg["lambda_coi"]),
|
||||
robust_radius=float(cfg["robust_radius"]),
|
||||
robust_points=int(cfg["robust_points"]),
|
||||
robust_rollouts=int(cfg.get("robust_rollouts", 1)),
|
||||
info_value=float(cfg["info_value"]),
|
||||
eta_ux=float(cfg.get("eta_ux", 0.5)),
|
||||
reward_profit_weight=float(cfg.get("reward_profit_weight", 1.0)),
|
||||
action_levels=int(cfg["action_levels"]),
|
||||
action_scale_low=float(cfg["action_scale_low"]),
|
||||
action_scale_high=float(cfg["action_scale_high"]),
|
||||
@@ -40,11 +43,14 @@ def _action(agent: Any, obs: Any, deterministic: bool = True):
|
||||
return action
|
||||
|
||||
|
||||
def evaluate(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
rewards: list[float] = []
|
||||
revenues: list[float] = []
|
||||
margins: list[float] = []
|
||||
coi_levels: list[float] = []
|
||||
coi_leakages: list[float] = []
|
||||
volatilities: list[float] = []
|
||||
agent_probs: list[float] = []
|
||||
|
||||
for _ in range(int(episodes)):
|
||||
obs, _ = env.reset()
|
||||
@@ -53,6 +59,9 @@ def evaluate(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
ep_revenue = 0.0
|
||||
ep_margin = 0.0
|
||||
ep_coi = 0.0
|
||||
ep_coi_leakage = 0.0
|
||||
ep_volatility = 0.0
|
||||
ep_agent_prob = 0.0
|
||||
steps = 0
|
||||
|
||||
while not done:
|
||||
@@ -63,6 +72,9 @@ def evaluate(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
ep_revenue += float(econ.get("revenue", info.get("revenue", 0.0)))
|
||||
ep_margin += float(econ.get("margin", 0.0))
|
||||
ep_coi += float(econ.get("coi_level", 0.0))
|
||||
ep_coi_leakage += float(econ.get("coi_leakage", 0.0))
|
||||
ep_volatility += float(econ.get("volatility", 0.0))
|
||||
ep_agent_prob += float(econ.get("agent_prob", info.get("agent_prob", 0.0)))
|
||||
steps += 1
|
||||
|
||||
rewards.append(ep_reward)
|
||||
@@ -70,6 +82,9 @@ def evaluate(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
denom = max(steps, 1)
|
||||
margins.append(ep_margin / denom)
|
||||
coi_levels.append(ep_coi / denom)
|
||||
coi_leakages.append(ep_coi_leakage / denom)
|
||||
volatilities.append(ep_volatility / denom)
|
||||
agent_probs.append(ep_agent_prob / denom)
|
||||
|
||||
return {
|
||||
"eval/reward_mean": float(np.mean(rewards)) if rewards else 0.0,
|
||||
@@ -78,4 +93,60 @@ def evaluate(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
||||
"eval/revenue_std": float(np.std(revenues)) if revenues else 0.0,
|
||||
"eval/margin_mean": float(np.mean(margins)) if margins else 0.0,
|
||||
"eval/coi_level_mean": float(np.mean(coi_levels)) if coi_levels else 0.0,
|
||||
"eval/coi_leakage_mean": float(np.mean(coi_leakages)) if coi_leakages else 0.0,
|
||||
"eval/volatility_mean": float(np.mean(volatilities)) if volatilities else 0.0,
|
||||
"eval/agent_prob_mean": float(np.mean(agent_probs)) if agent_probs else 0.0,
|
||||
}
|
||||
|
||||
|
||||
def evaluate(
|
||||
agent: Any,
|
||||
env: Any,
|
||||
episodes: int,
|
||||
cfg: Mapping[str, Any] | None = None,
|
||||
) -> dict[str, float]:
|
||||
metrics = _evaluate_env(agent, env, episodes)
|
||||
if cfg is None or not bool(cfg.get("robust_eval_enabled", True)):
|
||||
return metrics
|
||||
|
||||
nominal_alpha = float(cfg.get("alpha", 0.0))
|
||||
eval_radius = max(float(cfg.get("robust_radius", 0.0)), 0.15)
|
||||
low_alpha = float(np.clip(nominal_alpha - eval_radius, 0.0, 1.0))
|
||||
high_alpha = float(np.clip(nominal_alpha + eval_radius, 0.0, 1.0))
|
||||
shifted_episodes = max(1, int(np.ceil(int(episodes) / 2)))
|
||||
|
||||
shifted_rows = []
|
||||
for tag, alpha in (
|
||||
("low", low_alpha),
|
||||
("nominal", nominal_alpha),
|
||||
("high", high_alpha),
|
||||
):
|
||||
eval_cfg = dict(cfg)
|
||||
eval_cfg["alpha"] = float(alpha)
|
||||
shifted_env = make_env(eval_cfg)
|
||||
shifted_metrics = _evaluate_env(agent, shifted_env, shifted_episodes)
|
||||
shifted_env.close()
|
||||
shifted_rows.append((tag, alpha, shifted_metrics))
|
||||
|
||||
metrics["eval/robust_alpha_low"] = low_alpha
|
||||
metrics["eval/robust_alpha_high"] = high_alpha
|
||||
metrics["eval/robust_reward_worst"] = float(
|
||||
min(row[2]["eval/reward_mean"] for row in shifted_rows)
|
||||
)
|
||||
metrics["eval/robust_revenue_worst"] = float(
|
||||
min(row[2]["eval/revenue_mean"] for row in shifted_rows)
|
||||
)
|
||||
metrics["eval/robust_coi_leakage_worst"] = float(
|
||||
max(row[2]["eval/coi_leakage_mean"] for row in shifted_rows)
|
||||
)
|
||||
for tag, alpha, shifted_metrics in shifted_rows:
|
||||
metrics[f"eval/{tag}_alpha"] = float(alpha)
|
||||
metrics[f"eval/{tag}_reward_mean"] = float(shifted_metrics["eval/reward_mean"])
|
||||
metrics[f"eval/{tag}_revenue_mean"] = float(
|
||||
shifted_metrics["eval/revenue_mean"]
|
||||
)
|
||||
metrics[f"eval/{tag}_coi_leakage_mean"] = float(
|
||||
shifted_metrics["eval/coi_leakage_mean"]
|
||||
)
|
||||
|
||||
return metrics
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Any, Mapping
|
||||
import numpy as np
|
||||
|
||||
from .common import evaluate, make_env
|
||||
from ..telemetry.wandb import get_wandb_module
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -36,6 +37,9 @@ def train_qtable(
|
||||
console_progress = bool(cfg.get("console_progress", False))
|
||||
obs, _ = env.reset(seed=int(cfg["seed"]))
|
||||
started_at = time.perf_counter()
|
||||
wandb = get_wandb_module()
|
||||
wandb_live = bool(wandb is not None and wandb.run is not None)
|
||||
step_offset = max(0, int(cfg.get("wandb_step_offset", 0)))
|
||||
|
||||
interval_sums = {
|
||||
"reward": 0.0,
|
||||
@@ -75,7 +79,10 @@ def train_qtable(
|
||||
"train/epsilon": float(epsilon),
|
||||
"train/global_step": int(steps),
|
||||
}
|
||||
train_events.append(event)
|
||||
if wandb_live:
|
||||
wandb.log(dict(event), step=step_offset + int(steps))
|
||||
else:
|
||||
train_events.append(event)
|
||||
if console_progress:
|
||||
elapsed = max(time.perf_counter() - started_at, 1e-6)
|
||||
speed = steps / elapsed
|
||||
@@ -96,17 +103,19 @@ def train_qtable(
|
||||
|
||||
if interval_count > 0:
|
||||
denom = float(interval_count)
|
||||
train_events.append(
|
||||
{
|
||||
"train/reward_mean": interval_sums["reward"] / denom,
|
||||
"train/revenue_mean": interval_sums["revenue"] / denom,
|
||||
"train/agent_prob": interval_sums["agent_prob"] / denom,
|
||||
"train/alpha_adv": interval_sums["alpha_adv"] / denom,
|
||||
"train/coi_leakage": interval_sums["coi_leakage"] / denom,
|
||||
"train/epsilon": float(epsilon),
|
||||
"train/global_step": int(steps),
|
||||
}
|
||||
)
|
||||
tail_event = {
|
||||
"train/reward_mean": interval_sums["reward"] / denom,
|
||||
"train/revenue_mean": interval_sums["revenue"] / denom,
|
||||
"train/agent_prob": interval_sums["agent_prob"] / denom,
|
||||
"train/alpha_adv": interval_sums["alpha_adv"] / denom,
|
||||
"train/coi_leakage": interval_sums["coi_leakage"] / denom,
|
||||
"train/epsilon": float(epsilon),
|
||||
"train/global_step": int(steps),
|
||||
}
|
||||
if wandb_live:
|
||||
wandb.log(dict(tail_event), step=step_offset + int(steps))
|
||||
else:
|
||||
train_events.append(tail_event)
|
||||
|
||||
metrics: dict[str, Any] = {
|
||||
"train/reward_mean": total_reward / max(steps, 1),
|
||||
@@ -114,7 +123,7 @@ def train_qtable(
|
||||
"train/epsilon": float(epsilon),
|
||||
"train/global_step": int(cfg["total_timesteps"]),
|
||||
}
|
||||
metrics.update(evaluate(agent, eval_env, int(cfg["eval_episodes"])))
|
||||
metrics.update(evaluate(agent, eval_env, int(cfg["eval_episodes"]), cfg=cfg))
|
||||
metrics["_train_events"] = train_events
|
||||
|
||||
env.close()
|
||||
|
||||
@@ -144,7 +144,9 @@ def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
||||
pass
|
||||
|
||||
metrics_callback = MetricsCallback(
|
||||
log_histograms=False, log_freq=int(cfg["log_freq"])
|
||||
log_histograms=False,
|
||||
log_freq=int(cfg["log_freq"]),
|
||||
step_offset=int(cfg.get("wandb_step_offset", 0)),
|
||||
)
|
||||
callbacks = [metrics_callback]
|
||||
callbacks.append(
|
||||
@@ -175,6 +177,7 @@ def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
||||
model,
|
||||
eval_env,
|
||||
int(cfg["eval_episodes"]),
|
||||
cfg=cfg,
|
||||
)
|
||||
metrics["train/global_step"] = int(model.num_timesteps)
|
||||
metrics["model/path"] = str(model_path.with_suffix(".zip"))
|
||||
|
||||
Reference in New Issue
Block a user