first meaningful runs

This commit is contained in:
2026-03-08 21:37:13 +01:00
parent 4c658a93a7
commit 73a1dafc6e
5 changed files with 96 additions and 25 deletions

View File

@@ -1,11 +1,15 @@
from __future__ import annotations from __future__ import annotations
import logging
import time
from typing import Any, Mapping from typing import Any, Mapping
import numpy as np import numpy as np
from .common import evaluate, make_env from .common import evaluate, make_env
logger = logging.getLogger(__name__)
def train_qtable( def train_qtable(
cfg: Mapping[str, Any], cfg: Mapping[str, Any],
@@ -29,7 +33,9 @@ def train_qtable(
steps = 0 steps = 0
epsilon = float(cfg["eps_start"]) epsilon = float(cfg["eps_start"])
log_freq = max(1, int(cfg.get("log_freq", 100))) log_freq = max(1, int(cfg.get("log_freq", 100)))
console_progress = bool(cfg.get("console_progress", False))
obs, _ = env.reset(seed=int(cfg["seed"])) obs, _ = env.reset(seed=int(cfg["seed"]))
started_at = time.perf_counter()
interval_sums = { interval_sums = {
"reward": 0.0, "reward": 0.0,
@@ -60,17 +66,28 @@ def train_qtable(
if steps % log_freq == 0 and interval_count > 0: if steps % log_freq == 0 and interval_count > 0:
denom = float(interval_count) denom = float(interval_count)
train_events.append( event = {
{ "train/reward_mean": interval_sums["reward"] / denom,
"train/reward_mean": interval_sums["reward"] / denom, "train/revenue_mean": interval_sums["revenue"] / denom,
"train/revenue_mean": interval_sums["revenue"] / denom, "train/agent_prob": interval_sums["agent_prob"] / denom,
"train/agent_prob": interval_sums["agent_prob"] / denom, "train/alpha_adv": interval_sums["alpha_adv"] / denom,
"train/alpha_adv": interval_sums["alpha_adv"] / denom, "train/coi_leakage": interval_sums["coi_leakage"] / denom,
"train/coi_leakage": interval_sums["coi_leakage"] / denom, "train/epsilon": float(epsilon),
"train/epsilon": float(epsilon), "train/global_step": int(steps),
"train/global_step": int(steps), }
} train_events.append(event)
) if console_progress:
elapsed = max(time.perf_counter() - started_at, 1e-6)
speed = steps / elapsed
logger.info(
"step=%d/%d reward=%.3f revenue=%.3f eps=%.4f speed=%.1f steps/s",
steps,
int(cfg["total_timesteps"]),
event["train/reward_mean"],
event["train/revenue_mean"],
event["train/epsilon"],
speed,
)
interval_sums = {key: 0.0 for key in interval_sums} interval_sums = {key: 0.0 for key in interval_sums}
interval_count = 0 interval_count = 0

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import argparse import argparse
import json import json
import logging
import os import os
from datetime import datetime, UTC from datetime import datetime, UTC
from pathlib import Path from pathlib import Path
@@ -11,11 +12,17 @@ import numpy as np
import pandas as pd import pandas as pd
from .lib.tiers import LinearElasticityPolicy, StaticPolicy, SurgePolicy from .lib.tiers import LinearElasticityPolicy, StaticPolicy, SurgePolicy
from .logging_utils import configure_logging
from .spec import TrainSpec from .spec import TrainSpec
from .telemetry.wandb import get_wandb_module from .telemetry.wandb import get_wandb_module
wandb = get_wandb_module() wandb = get_wandb_module()
HAS_WANDB = wandb is not None HAS_WANDB = wandb is not None
logger = logging.getLogger(__name__)
def _log(message: str) -> None:
logger.info(message)
def _parse_list(raw: str) -> list[str]: def _parse_list(raw: str) -> list[str]:
@@ -78,8 +85,6 @@ def _run_eval_episode(env, policy) -> dict:
def _build_tier(name: str, cfg: dict, alpha: float): def _build_tier(name: str, cfg: dict, alpha: float):
from .backends.common import make_env from .backends.common import make_env
from .backends.qtable import train_qtable
from .backends.sb3 import train_sb3
tier = name.lower().strip() tier = name.lower().strip()
run_cfg = dict(cfg) run_cfg = dict(cfg)
@@ -111,10 +116,15 @@ def _build_tier(name: str, cfg: dict, alpha: float):
return policy return policy
if tier == "qtable": if tier == "qtable":
from .backends.qtable import train_qtable
run_cfg["console_progress"] = True
agent, _ = train_qtable(run_cfg) agent, _ = train_qtable(run_cfg)
return agent return agent
if tier in {"ppo", "a2c", "dqn"}: if tier in {"ppo", "a2c", "dqn"}:
from .backends.sb3 import train_sb3
run_cfg["algo"] = tier run_cfg["algo"] = tier
agent, _ = train_sb3(run_cfg) agent, _ = train_sb3(run_cfg)
return agent return agent
@@ -129,9 +139,15 @@ def run_benchmark(
rows: list[dict] = [] rows: list[dict] = []
traces: list[dict] = [] traces: list[dict] = []
total_runs = max(1, len(alpha_values) * len(tiers))
run_index = 0
for alpha in alpha_values: for alpha in alpha_values:
for tier_name in tiers: for tier_name in tiers:
run_index += 1
_log(
f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: training"
)
policy = _build_tier(tier_name, cfg, alpha) policy = _build_tier(tier_name, cfg, alpha)
env = make_env({**cfg, "alpha": float(alpha)}) env = make_env({**cfg, "alpha": float(alpha)})
eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))] eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))]
@@ -152,6 +168,11 @@ def run_benchmark(
+ float(cfg.get("revenue_weight", 0.01)) * row["mean_revenue"] + float(cfg.get("revenue_weight", 0.01)) * row["mean_revenue"]
) )
rows.append(row) rows.append(row)
_log(
f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: "
f"reward={row['mean_reward']:.3f} revenue={row['mean_revenue']:.3f} "
f"coi={row['mean_coi']:.4f} score={row['objective_score']:.3f}"
)
max_len = max((len(e["price_trace"]) for e in eps), default=0) max_len = max((len(e["price_trace"]) for e in eps), default=0)
step_means = [] step_means = []
@@ -282,6 +303,18 @@ def _run_with_args(args):
} }
tiers = _parse_list(args.tiers) tiers = _parse_list(args.tiers)
alpha_values = _parse_float_list(args.alpha_values) alpha_values = _parse_float_list(args.alpha_values)
_log(
"starting run "
+ json.dumps(
{
"tiers": tiers,
"alpha_values": alpha_values,
"episodes": int(args.episodes),
"total_timesteps": int(args.total_timesteps),
"device": str(args.device),
}
)
)
all_frames: list[pd.DataFrame] = [] all_frames: list[pd.DataFrame] = []
all_traces: list[dict] = [] all_traces: list[dict] = []
@@ -292,8 +325,10 @@ def _run_with_args(args):
{k: v for k, v in overrides.items() if v is not None} {k: v for k, v in overrides.items() if v is not None}
).to_flat_dict() ).to_flat_dict()
cfg["linear_warmup_steps"] = int(args.linear_warmup_steps) cfg["linear_warmup_steps"] = int(args.linear_warmup_steps)
df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes)
mode_label = "no_robust" if no_robust else "robust" mode_label = "no_robust" if no_robust else "robust"
_log(f"mode={mode_label}: begin")
df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes)
_log(f"mode={mode_label}: complete ({len(df_mode)} rows)")
df_mode["mode"] = mode_label df_mode["mode"] = mode_label
for trace in traces_mode: for trace in traces_mode:
trace["mode"] = mode_label trace["mode"] = mode_label
@@ -311,11 +346,12 @@ def _run_with_args(args):
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
trace_path.write_text(json.dumps(traces, indent=2)) trace_path.write_text(json.dumps(traces, indent=2))
rev_path, coi_path, price_path = _plot_outputs(df, traces, out_dir, stamp) rev_path, coi_path, price_path = _plot_outputs(df, traces, out_dir, stamp)
_log(f"artifacts written in {out_dir}")
if not df.empty: if not df.empty:
best_idx = int(df["mean_revenue"].idxmax()) best_idx = int(df["mean_revenue"].idxmax())
best = df.iloc[best_idx] best = df.iloc[best_idx]
print( _log(
"BEST_TIER=" "BEST_TIER="
+ json.dumps( + json.dumps(
{ {
@@ -327,14 +363,15 @@ def _run_with_args(args):
} }
) )
) )
print(f"BENCHMARK_CSV={csv_path}") _log(f"BENCHMARK_CSV={csv_path}")
print(f"BENCHMARK_TRACES={trace_path}") _log(f"BENCHMARK_TRACES={trace_path}")
print(f"BENCHMARK_PLOT_REVENUE={rev_path}") _log(f"BENCHMARK_PLOT_REVENUE={rev_path}")
print(f"BENCHMARK_PLOT_COI={coi_path}") _log(f"BENCHMARK_PLOT_COI={coi_path}")
print(f"BENCHMARK_PLOT_PRICE={price_path}") _log(f"BENCHMARK_PLOT_PRICE={price_path}")
def run_cli(raw_args: list[str] | None = None): def run_cli(raw_args: list[str] | None = None):
configure_logging()
parser = argparse.ArgumentParser(description="PHANTOM benchmark orchestrator") parser = argparse.ArgumentParser(description="PHANTOM benchmark orchestrator")
parser.add_argument("--project", default="capstone") parser.add_argument("--project", default="capstone")
parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo") parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo")

View File

@@ -1,7 +1,7 @@
from sys import platform from sys import platform
import numpy as np import numpy as np
from .lib.demand import generate_demand_for_actor, estimate_demand from .lib.demand import generate_demand_for_actor, estimate_demand
from .lib.behavior import sample_behavior from .lib.behavior import get_adjusted_transitions, sample_behavior_from_transitions
from logging import INFO, getLogger from logging import INFO, getLogger
logger = getLogger(__name__) logger = getLogger(__name__)
@@ -46,9 +46,17 @@ class MarketEngine:
self.noise_std, self.noise_std,
distribution_method=self.demand_dist, distribution_method=self.demand_dist,
) )
human_transitions = get_adjusted_transitions(demand_h, human=True)
agent_transitions = get_adjusted_transitions(demand_a, human=False)
# sample behavior trajectories from each demand distribution # sample behavior trajectories from each demand distribution
human_t = [sample_behavior(demand_h, human=True) for _ in range(self.Nhumans)] human_t = [
agent_t = [sample_behavior(demand_a, human=False) for _ in range(self.Nagents)] sample_behavior_from_transitions(human_transitions)
for _ in range(self.Nhumans)
]
agent_t = [
sample_behavior_from_transitions(agent_transitions)
for _ in range(self.Nagents)
]
# store trajectories for agent probability calculation # store trajectories for agent probability calculation
self.last_trajectories = human_t + agent_t self.last_trajectories = human_t + agent_t
return estimate_demand(self.last_trajectories, self.action_weights) return estimate_demand(self.last_trajectories, self.action_weights)

View File

@@ -110,10 +110,12 @@ def adjust_behavior_to_condition(condition, transition_matrix):
return pd.DataFrame(expanded, index=new_rows, columns=new_cols) return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
def sample_behavior(condition, human=True, max_len=40): def get_adjusted_transitions(condition, human=True):
base_pivot = _get_base_pivot(human) base_pivot = _get_base_pivot(human)
adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot) return adjust_behavior_to_condition(condition, base_pivot)
def sample_behavior_from_transitions(adjusted_transitions, max_len=40):
trajectory = [np.random.choice(adjusted_transitions.index)] trajectory = [np.random.choice(adjusted_transitions.index)]
while len(trajectory) < max_len and "checkout" not in trajectory[-1]: while len(trajectory) < max_len and "checkout" not in trajectory[-1]:
probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float) probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float)
@@ -127,6 +129,11 @@ def sample_behavior(condition, human=True, max_len=40):
return trajectory return trajectory
def sample_behavior(condition, human=True, max_len=40):
adjusted_transitions = get_adjusted_transitions(condition, human=human)
return sample_behavior_from_transitions(adjusted_transitions, max_len=max_len)
if __name__ == "__main__": if __name__ == "__main__":
t = sample_behavior(generate_demand_for_actor(np.array([10, 20, 30])), human=True) t = sample_behavior(generate_demand_for_actor(np.array([10, 20, 30])), human=True)
print(t) print(t)

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse import argparse
from typing import Any from typing import Any
from .logging_utils import configure_logging
from .orchestrators import run_benchmark_cli, run_sweep_agent, run_train_once from .orchestrators import run_benchmark_cli, run_sweep_agent, run_train_once
from .spec import TrainSpec from .spec import TrainSpec
@@ -174,6 +175,7 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]:
def main(argv: list[str] | None = None) -> None: def main(argv: list[str] | None = None) -> None:
import sys import sys
configure_logging()
raw_args = list(sys.argv[1:] if argv is None else argv) raw_args = list(sys.argv[1:] if argv is None else argv)
run_kind = _probe_run_kind(raw_args) run_kind = _probe_run_kind(raw_args)
if run_kind == "benchmark": if run_kind == "benchmark":