From 73a1dafc6eee62f04882e3fd4e19ada0270b202b Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 8 Mar 2026 21:37:13 +0100 Subject: [PATCH] first meaningful runs --- engine/backends/qtable.py | 39 +++++++++++++++++++-------- engine/benchmark.py | 55 ++++++++++++++++++++++++++++++++------- engine/engine.py | 14 +++++++--- engine/lib/behavior.py | 11 ++++++-- engine/train.py | 2 ++ 5 files changed, 96 insertions(+), 25 deletions(-) diff --git a/engine/backends/qtable.py b/engine/backends/qtable.py index 754cfa8..6468d43 100644 --- a/engine/backends/qtable.py +++ b/engine/backends/qtable.py @@ -1,11 +1,15 @@ from __future__ import annotations +import logging +import time from typing import Any, Mapping import numpy as np from .common import evaluate, make_env +logger = logging.getLogger(__name__) + def train_qtable( cfg: Mapping[str, Any], @@ -29,7 +33,9 @@ def train_qtable( steps = 0 epsilon = float(cfg["eps_start"]) log_freq = max(1, int(cfg.get("log_freq", 100))) + console_progress = bool(cfg.get("console_progress", False)) obs, _ = env.reset(seed=int(cfg["seed"])) + started_at = time.perf_counter() interval_sums = { "reward": 0.0, @@ -60,17 +66,28 @@ def train_qtable( if steps % log_freq == 0 and interval_count > 0: denom = float(interval_count) - train_events.append( - { - "train/reward_mean": interval_sums["reward"] / denom, - "train/revenue_mean": interval_sums["revenue"] / denom, - "train/agent_prob": interval_sums["agent_prob"] / denom, - "train/alpha_adv": interval_sums["alpha_adv"] / denom, - "train/coi_leakage": interval_sums["coi_leakage"] / denom, - "train/epsilon": float(epsilon), - "train/global_step": int(steps), - } - ) + event = { + "train/reward_mean": interval_sums["reward"] / denom, + "train/revenue_mean": interval_sums["revenue"] / denom, + "train/agent_prob": interval_sums["agent_prob"] / denom, + "train/alpha_adv": interval_sums["alpha_adv"] / denom, + "train/coi_leakage": interval_sums["coi_leakage"] / denom, + "train/epsilon": float(epsilon), + "train/global_step": int(steps), + } + train_events.append(event) + if console_progress: + elapsed = max(time.perf_counter() - started_at, 1e-6) + speed = steps / elapsed + logger.info( + "step=%d/%d reward=%.3f revenue=%.3f eps=%.4f speed=%.1f steps/s", + steps, + int(cfg["total_timesteps"]), + event["train/reward_mean"], + event["train/revenue_mean"], + event["train/epsilon"], + speed, + ) interval_sums = {key: 0.0 for key in interval_sums} interval_count = 0 diff --git a/engine/benchmark.py b/engine/benchmark.py index 65b6e47..41d4ca5 100644 --- a/engine/benchmark.py +++ b/engine/benchmark.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse import json +import logging import os from datetime import datetime, UTC from pathlib import Path @@ -11,11 +12,17 @@ import numpy as np import pandas as pd from .lib.tiers import LinearElasticityPolicy, StaticPolicy, SurgePolicy +from .logging_utils import configure_logging from .spec import TrainSpec from .telemetry.wandb import get_wandb_module wandb = get_wandb_module() HAS_WANDB = wandb is not None +logger = logging.getLogger(__name__) + + +def _log(message: str) -> None: + logger.info(message) def _parse_list(raw: str) -> list[str]: @@ -78,8 +85,6 @@ def _run_eval_episode(env, policy) -> dict: def _build_tier(name: str, cfg: dict, alpha: float): from .backends.common import make_env - from .backends.qtable import train_qtable - from .backends.sb3 import train_sb3 tier = name.lower().strip() run_cfg = dict(cfg) @@ -111,10 +116,15 @@ def _build_tier(name: str, cfg: dict, alpha: float): return policy if tier == "qtable": + from .backends.qtable import train_qtable + + run_cfg["console_progress"] = True agent, _ = train_qtable(run_cfg) return agent if tier in {"ppo", "a2c", "dqn"}: + from .backends.sb3 import train_sb3 + run_cfg["algo"] = tier agent, _ = train_sb3(run_cfg) return agent @@ -129,9 +139,15 @@ def run_benchmark( rows: list[dict] = [] traces: list[dict] = [] + total_runs = max(1, len(alpha_values) * len(tiers)) + run_index = 0 for alpha in alpha_values: for tier_name in tiers: + run_index += 1 + _log( + f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: training" + ) policy = _build_tier(tier_name, cfg, alpha) env = make_env({**cfg, "alpha": float(alpha)}) eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))] @@ -152,6 +168,11 @@ def run_benchmark( + float(cfg.get("revenue_weight", 0.01)) * row["mean_revenue"] ) rows.append(row) + _log( + f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: " + f"reward={row['mean_reward']:.3f} revenue={row['mean_revenue']:.3f} " + f"coi={row['mean_coi']:.4f} score={row['objective_score']:.3f}" + ) max_len = max((len(e["price_trace"]) for e in eps), default=0) step_means = [] @@ -282,6 +303,18 @@ def _run_with_args(args): } tiers = _parse_list(args.tiers) alpha_values = _parse_float_list(args.alpha_values) + _log( + "starting run " + + json.dumps( + { + "tiers": tiers, + "alpha_values": alpha_values, + "episodes": int(args.episodes), + "total_timesteps": int(args.total_timesteps), + "device": str(args.device), + } + ) + ) all_frames: list[pd.DataFrame] = [] all_traces: list[dict] = [] @@ -292,8 +325,10 @@ def _run_with_args(args): {k: v for k, v in overrides.items() if v is not None} ).to_flat_dict() cfg["linear_warmup_steps"] = int(args.linear_warmup_steps) - df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes) mode_label = "no_robust" if no_robust else "robust" + _log(f"mode={mode_label}: begin") + df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes) + _log(f"mode={mode_label}: complete ({len(df_mode)} rows)") df_mode["mode"] = mode_label for trace in traces_mode: trace["mode"] = mode_label @@ -311,11 +346,12 @@ def _run_with_args(args): df.to_csv(csv_path, index=False) trace_path.write_text(json.dumps(traces, indent=2)) rev_path, coi_path, price_path = _plot_outputs(df, traces, out_dir, stamp) + _log(f"artifacts written in {out_dir}") if not df.empty: best_idx = int(df["mean_revenue"].idxmax()) best = df.iloc[best_idx] - print( + _log( "BEST_TIER=" + json.dumps( { @@ -327,14 +363,15 @@ def _run_with_args(args): } ) ) - print(f"BENCHMARK_CSV={csv_path}") - print(f"BENCHMARK_TRACES={trace_path}") - print(f"BENCHMARK_PLOT_REVENUE={rev_path}") - print(f"BENCHMARK_PLOT_COI={coi_path}") - print(f"BENCHMARK_PLOT_PRICE={price_path}") + _log(f"BENCHMARK_CSV={csv_path}") + _log(f"BENCHMARK_TRACES={trace_path}") + _log(f"BENCHMARK_PLOT_REVENUE={rev_path}") + _log(f"BENCHMARK_PLOT_COI={coi_path}") + _log(f"BENCHMARK_PLOT_PRICE={price_path}") def run_cli(raw_args: list[str] | None = None): + configure_logging() parser = argparse.ArgumentParser(description="PHANTOM benchmark orchestrator") parser.add_argument("--project", default="capstone") parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo") diff --git a/engine/engine.py b/engine/engine.py index 8ca6339..b4a2cbc 100644 --- a/engine/engine.py +++ b/engine/engine.py @@ -1,7 +1,7 @@ from sys import platform import numpy as np from .lib.demand import generate_demand_for_actor, estimate_demand -from .lib.behavior import sample_behavior +from .lib.behavior import get_adjusted_transitions, sample_behavior_from_transitions from logging import INFO, getLogger logger = getLogger(__name__) @@ -46,9 +46,17 @@ class MarketEngine: self.noise_std, distribution_method=self.demand_dist, ) + human_transitions = get_adjusted_transitions(demand_h, human=True) + agent_transitions = get_adjusted_transitions(demand_a, human=False) # sample behavior trajectories from each demand distribution - human_t = [sample_behavior(demand_h, human=True) for _ in range(self.Nhumans)] - agent_t = [sample_behavior(demand_a, human=False) for _ in range(self.Nagents)] + human_t = [ + sample_behavior_from_transitions(human_transitions) + for _ in range(self.Nhumans) + ] + agent_t = [ + sample_behavior_from_transitions(agent_transitions) + for _ in range(self.Nagents) + ] # store trajectories for agent probability calculation self.last_trajectories = human_t + agent_t return estimate_demand(self.last_trajectories, self.action_weights) diff --git a/engine/lib/behavior.py b/engine/lib/behavior.py index 6a3a411..588ebc9 100644 --- a/engine/lib/behavior.py +++ b/engine/lib/behavior.py @@ -110,10 +110,12 @@ def adjust_behavior_to_condition(condition, transition_matrix): return pd.DataFrame(expanded, index=new_rows, columns=new_cols) -def sample_behavior(condition, human=True, max_len=40): +def get_adjusted_transitions(condition, human=True): base_pivot = _get_base_pivot(human) - adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot) + return adjust_behavior_to_condition(condition, base_pivot) + +def sample_behavior_from_transitions(adjusted_transitions, max_len=40): trajectory = [np.random.choice(adjusted_transitions.index)] while len(trajectory) < max_len and "checkout" not in trajectory[-1]: probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float) @@ -127,6 +129,11 @@ def sample_behavior(condition, human=True, max_len=40): return trajectory +def sample_behavior(condition, human=True, max_len=40): + adjusted_transitions = get_adjusted_transitions(condition, human=human) + return sample_behavior_from_transitions(adjusted_transitions, max_len=max_len) + + if __name__ == "__main__": t = sample_behavior(generate_demand_for_actor(np.array([10, 20, 30])), human=True) print(t) diff --git a/engine/train.py b/engine/train.py index 7dd2f68..0133db7 100644 --- a/engine/train.py +++ b/engine/train.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse from typing import Any +from .logging_utils import configure_logging from .orchestrators import run_benchmark_cli, run_sweep_agent, run_train_once from .spec import TrainSpec @@ -174,6 +175,7 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]: def main(argv: list[str] | None = None) -> None: import sys + configure_logging() raw_args = list(sys.argv[1:] if argv is None else argv) run_kind = _probe_run_kind(raw_args) if run_kind == "benchmark":