mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
first meaningful runs
This commit is contained in:
@@ -1,11 +1,15 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
from typing import Any, Mapping
|
from typing import Any, Mapping
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .common import evaluate, make_env
|
from .common import evaluate, make_env
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def train_qtable(
|
def train_qtable(
|
||||||
cfg: Mapping[str, Any],
|
cfg: Mapping[str, Any],
|
||||||
@@ -29,7 +33,9 @@ def train_qtable(
|
|||||||
steps = 0
|
steps = 0
|
||||||
epsilon = float(cfg["eps_start"])
|
epsilon = float(cfg["eps_start"])
|
||||||
log_freq = max(1, int(cfg.get("log_freq", 100)))
|
log_freq = max(1, int(cfg.get("log_freq", 100)))
|
||||||
|
console_progress = bool(cfg.get("console_progress", False))
|
||||||
obs, _ = env.reset(seed=int(cfg["seed"]))
|
obs, _ = env.reset(seed=int(cfg["seed"]))
|
||||||
|
started_at = time.perf_counter()
|
||||||
|
|
||||||
interval_sums = {
|
interval_sums = {
|
||||||
"reward": 0.0,
|
"reward": 0.0,
|
||||||
@@ -60,8 +66,7 @@ def train_qtable(
|
|||||||
|
|
||||||
if steps % log_freq == 0 and interval_count > 0:
|
if steps % log_freq == 0 and interval_count > 0:
|
||||||
denom = float(interval_count)
|
denom = float(interval_count)
|
||||||
train_events.append(
|
event = {
|
||||||
{
|
|
||||||
"train/reward_mean": interval_sums["reward"] / denom,
|
"train/reward_mean": interval_sums["reward"] / denom,
|
||||||
"train/revenue_mean": interval_sums["revenue"] / denom,
|
"train/revenue_mean": interval_sums["revenue"] / denom,
|
||||||
"train/agent_prob": interval_sums["agent_prob"] / denom,
|
"train/agent_prob": interval_sums["agent_prob"] / denom,
|
||||||
@@ -70,6 +75,18 @@ def train_qtable(
|
|||||||
"train/epsilon": float(epsilon),
|
"train/epsilon": float(epsilon),
|
||||||
"train/global_step": int(steps),
|
"train/global_step": int(steps),
|
||||||
}
|
}
|
||||||
|
train_events.append(event)
|
||||||
|
if console_progress:
|
||||||
|
elapsed = max(time.perf_counter() - started_at, 1e-6)
|
||||||
|
speed = steps / elapsed
|
||||||
|
logger.info(
|
||||||
|
"step=%d/%d reward=%.3f revenue=%.3f eps=%.4f speed=%.1f steps/s",
|
||||||
|
steps,
|
||||||
|
int(cfg["total_timesteps"]),
|
||||||
|
event["train/reward_mean"],
|
||||||
|
event["train/revenue_mean"],
|
||||||
|
event["train/epsilon"],
|
||||||
|
speed,
|
||||||
)
|
)
|
||||||
interval_sums = {key: 0.0 for key in interval_sums}
|
interval_sums = {key: 0.0 for key in interval_sums}
|
||||||
interval_count = 0
|
interval_count = 0
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -11,11 +12,17 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from .lib.tiers import LinearElasticityPolicy, StaticPolicy, SurgePolicy
|
from .lib.tiers import LinearElasticityPolicy, StaticPolicy, SurgePolicy
|
||||||
|
from .logging_utils import configure_logging
|
||||||
from .spec import TrainSpec
|
from .spec import TrainSpec
|
||||||
from .telemetry.wandb import get_wandb_module
|
from .telemetry.wandb import get_wandb_module
|
||||||
|
|
||||||
wandb = get_wandb_module()
|
wandb = get_wandb_module()
|
||||||
HAS_WANDB = wandb is not None
|
HAS_WANDB = wandb is not None
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _log(message: str) -> None:
|
||||||
|
logger.info(message)
|
||||||
|
|
||||||
|
|
||||||
def _parse_list(raw: str) -> list[str]:
|
def _parse_list(raw: str) -> list[str]:
|
||||||
@@ -78,8 +85,6 @@ def _run_eval_episode(env, policy) -> dict:
|
|||||||
|
|
||||||
def _build_tier(name: str, cfg: dict, alpha: float):
|
def _build_tier(name: str, cfg: dict, alpha: float):
|
||||||
from .backends.common import make_env
|
from .backends.common import make_env
|
||||||
from .backends.qtable import train_qtable
|
|
||||||
from .backends.sb3 import train_sb3
|
|
||||||
|
|
||||||
tier = name.lower().strip()
|
tier = name.lower().strip()
|
||||||
run_cfg = dict(cfg)
|
run_cfg = dict(cfg)
|
||||||
@@ -111,10 +116,15 @@ def _build_tier(name: str, cfg: dict, alpha: float):
|
|||||||
return policy
|
return policy
|
||||||
|
|
||||||
if tier == "qtable":
|
if tier == "qtable":
|
||||||
|
from .backends.qtable import train_qtable
|
||||||
|
|
||||||
|
run_cfg["console_progress"] = True
|
||||||
agent, _ = train_qtable(run_cfg)
|
agent, _ = train_qtable(run_cfg)
|
||||||
return agent
|
return agent
|
||||||
|
|
||||||
if tier in {"ppo", "a2c", "dqn"}:
|
if tier in {"ppo", "a2c", "dqn"}:
|
||||||
|
from .backends.sb3 import train_sb3
|
||||||
|
|
||||||
run_cfg["algo"] = tier
|
run_cfg["algo"] = tier
|
||||||
agent, _ = train_sb3(run_cfg)
|
agent, _ = train_sb3(run_cfg)
|
||||||
return agent
|
return agent
|
||||||
@@ -129,9 +139,15 @@ def run_benchmark(
|
|||||||
|
|
||||||
rows: list[dict] = []
|
rows: list[dict] = []
|
||||||
traces: list[dict] = []
|
traces: list[dict] = []
|
||||||
|
total_runs = max(1, len(alpha_values) * len(tiers))
|
||||||
|
run_index = 0
|
||||||
|
|
||||||
for alpha in alpha_values:
|
for alpha in alpha_values:
|
||||||
for tier_name in tiers:
|
for tier_name in tiers:
|
||||||
|
run_index += 1
|
||||||
|
_log(
|
||||||
|
f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: training"
|
||||||
|
)
|
||||||
policy = _build_tier(tier_name, cfg, alpha)
|
policy = _build_tier(tier_name, cfg, alpha)
|
||||||
env = make_env({**cfg, "alpha": float(alpha)})
|
env = make_env({**cfg, "alpha": float(alpha)})
|
||||||
eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))]
|
eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))]
|
||||||
@@ -152,6 +168,11 @@ def run_benchmark(
|
|||||||
+ float(cfg.get("revenue_weight", 0.01)) * row["mean_revenue"]
|
+ float(cfg.get("revenue_weight", 0.01)) * row["mean_revenue"]
|
||||||
)
|
)
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
|
_log(
|
||||||
|
f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: "
|
||||||
|
f"reward={row['mean_reward']:.3f} revenue={row['mean_revenue']:.3f} "
|
||||||
|
f"coi={row['mean_coi']:.4f} score={row['objective_score']:.3f}"
|
||||||
|
)
|
||||||
|
|
||||||
max_len = max((len(e["price_trace"]) for e in eps), default=0)
|
max_len = max((len(e["price_trace"]) for e in eps), default=0)
|
||||||
step_means = []
|
step_means = []
|
||||||
@@ -282,6 +303,18 @@ def _run_with_args(args):
|
|||||||
}
|
}
|
||||||
tiers = _parse_list(args.tiers)
|
tiers = _parse_list(args.tiers)
|
||||||
alpha_values = _parse_float_list(args.alpha_values)
|
alpha_values = _parse_float_list(args.alpha_values)
|
||||||
|
_log(
|
||||||
|
"starting run "
|
||||||
|
+ json.dumps(
|
||||||
|
{
|
||||||
|
"tiers": tiers,
|
||||||
|
"alpha_values": alpha_values,
|
||||||
|
"episodes": int(args.episodes),
|
||||||
|
"total_timesteps": int(args.total_timesteps),
|
||||||
|
"device": str(args.device),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
all_frames: list[pd.DataFrame] = []
|
all_frames: list[pd.DataFrame] = []
|
||||||
all_traces: list[dict] = []
|
all_traces: list[dict] = []
|
||||||
@@ -292,8 +325,10 @@ def _run_with_args(args):
|
|||||||
{k: v for k, v in overrides.items() if v is not None}
|
{k: v for k, v in overrides.items() if v is not None}
|
||||||
).to_flat_dict()
|
).to_flat_dict()
|
||||||
cfg["linear_warmup_steps"] = int(args.linear_warmup_steps)
|
cfg["linear_warmup_steps"] = int(args.linear_warmup_steps)
|
||||||
df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes)
|
|
||||||
mode_label = "no_robust" if no_robust else "robust"
|
mode_label = "no_robust" if no_robust else "robust"
|
||||||
|
_log(f"mode={mode_label}: begin")
|
||||||
|
df_mode, traces_mode = run_benchmark(cfg, tiers, alpha_values, args.episodes)
|
||||||
|
_log(f"mode={mode_label}: complete ({len(df_mode)} rows)")
|
||||||
df_mode["mode"] = mode_label
|
df_mode["mode"] = mode_label
|
||||||
for trace in traces_mode:
|
for trace in traces_mode:
|
||||||
trace["mode"] = mode_label
|
trace["mode"] = mode_label
|
||||||
@@ -311,11 +346,12 @@ def _run_with_args(args):
|
|||||||
df.to_csv(csv_path, index=False)
|
df.to_csv(csv_path, index=False)
|
||||||
trace_path.write_text(json.dumps(traces, indent=2))
|
trace_path.write_text(json.dumps(traces, indent=2))
|
||||||
rev_path, coi_path, price_path = _plot_outputs(df, traces, out_dir, stamp)
|
rev_path, coi_path, price_path = _plot_outputs(df, traces, out_dir, stamp)
|
||||||
|
_log(f"artifacts written in {out_dir}")
|
||||||
|
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
best_idx = int(df["mean_revenue"].idxmax())
|
best_idx = int(df["mean_revenue"].idxmax())
|
||||||
best = df.iloc[best_idx]
|
best = df.iloc[best_idx]
|
||||||
print(
|
_log(
|
||||||
"BEST_TIER="
|
"BEST_TIER="
|
||||||
+ json.dumps(
|
+ json.dumps(
|
||||||
{
|
{
|
||||||
@@ -327,14 +363,15 @@ def _run_with_args(args):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print(f"BENCHMARK_CSV={csv_path}")
|
_log(f"BENCHMARK_CSV={csv_path}")
|
||||||
print(f"BENCHMARK_TRACES={trace_path}")
|
_log(f"BENCHMARK_TRACES={trace_path}")
|
||||||
print(f"BENCHMARK_PLOT_REVENUE={rev_path}")
|
_log(f"BENCHMARK_PLOT_REVENUE={rev_path}")
|
||||||
print(f"BENCHMARK_PLOT_COI={coi_path}")
|
_log(f"BENCHMARK_PLOT_COI={coi_path}")
|
||||||
print(f"BENCHMARK_PLOT_PRICE={price_path}")
|
_log(f"BENCHMARK_PLOT_PRICE={price_path}")
|
||||||
|
|
||||||
|
|
||||||
def run_cli(raw_args: list[str] | None = None):
|
def run_cli(raw_args: list[str] | None = None):
|
||||||
|
configure_logging()
|
||||||
parser = argparse.ArgumentParser(description="PHANTOM benchmark orchestrator")
|
parser = argparse.ArgumentParser(description="PHANTOM benchmark orchestrator")
|
||||||
parser.add_argument("--project", default="capstone")
|
parser.add_argument("--project", default="capstone")
|
||||||
parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo")
|
parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo")
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from sys import platform
|
from sys import platform
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .lib.demand import generate_demand_for_actor, estimate_demand
|
from .lib.demand import generate_demand_for_actor, estimate_demand
|
||||||
from .lib.behavior import sample_behavior
|
from .lib.behavior import get_adjusted_transitions, sample_behavior_from_transitions
|
||||||
from logging import INFO, getLogger
|
from logging import INFO, getLogger
|
||||||
|
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
@@ -46,9 +46,17 @@ class MarketEngine:
|
|||||||
self.noise_std,
|
self.noise_std,
|
||||||
distribution_method=self.demand_dist,
|
distribution_method=self.demand_dist,
|
||||||
)
|
)
|
||||||
|
human_transitions = get_adjusted_transitions(demand_h, human=True)
|
||||||
|
agent_transitions = get_adjusted_transitions(demand_a, human=False)
|
||||||
# sample behavior trajectories from each demand distribution
|
# sample behavior trajectories from each demand distribution
|
||||||
human_t = [sample_behavior(demand_h, human=True) for _ in range(self.Nhumans)]
|
human_t = [
|
||||||
agent_t = [sample_behavior(demand_a, human=False) for _ in range(self.Nagents)]
|
sample_behavior_from_transitions(human_transitions)
|
||||||
|
for _ in range(self.Nhumans)
|
||||||
|
]
|
||||||
|
agent_t = [
|
||||||
|
sample_behavior_from_transitions(agent_transitions)
|
||||||
|
for _ in range(self.Nagents)
|
||||||
|
]
|
||||||
# store trajectories for agent probability calculation
|
# store trajectories for agent probability calculation
|
||||||
self.last_trajectories = human_t + agent_t
|
self.last_trajectories = human_t + agent_t
|
||||||
return estimate_demand(self.last_trajectories, self.action_weights)
|
return estimate_demand(self.last_trajectories, self.action_weights)
|
||||||
|
|||||||
@@ -110,10 +110,12 @@ def adjust_behavior_to_condition(condition, transition_matrix):
|
|||||||
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
|
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
|
||||||
|
|
||||||
|
|
||||||
def sample_behavior(condition, human=True, max_len=40):
|
def get_adjusted_transitions(condition, human=True):
|
||||||
base_pivot = _get_base_pivot(human)
|
base_pivot = _get_base_pivot(human)
|
||||||
adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot)
|
return adjust_behavior_to_condition(condition, base_pivot)
|
||||||
|
|
||||||
|
|
||||||
|
def sample_behavior_from_transitions(adjusted_transitions, max_len=40):
|
||||||
trajectory = [np.random.choice(adjusted_transitions.index)]
|
trajectory = [np.random.choice(adjusted_transitions.index)]
|
||||||
while len(trajectory) < max_len and "checkout" not in trajectory[-1]:
|
while len(trajectory) < max_len and "checkout" not in trajectory[-1]:
|
||||||
probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float)
|
probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float)
|
||||||
@@ -127,6 +129,11 @@ def sample_behavior(condition, human=True, max_len=40):
|
|||||||
return trajectory
|
return trajectory
|
||||||
|
|
||||||
|
|
||||||
|
def sample_behavior(condition, human=True, max_len=40):
|
||||||
|
adjusted_transitions = get_adjusted_transitions(condition, human=human)
|
||||||
|
return sample_behavior_from_transitions(adjusted_transitions, max_len=max_len)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
t = sample_behavior(generate_demand_for_actor(np.array([10, 20, 30])), human=True)
|
t = sample_behavior(generate_demand_for_actor(np.array([10, 20, 30])), human=True)
|
||||||
print(t)
|
print(t)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
import argparse
|
import argparse
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from .logging_utils import configure_logging
|
||||||
from .orchestrators import run_benchmark_cli, run_sweep_agent, run_train_once
|
from .orchestrators import run_benchmark_cli, run_sweep_agent, run_train_once
|
||||||
from .spec import TrainSpec
|
from .spec import TrainSpec
|
||||||
|
|
||||||
@@ -174,6 +175,7 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
def main(argv: list[str] | None = None) -> None:
|
def main(argv: list[str] | None = None) -> None:
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
configure_logging()
|
||||||
raw_args = list(sys.argv[1:] if argv is None else argv)
|
raw_args = list(sys.argv[1:] if argv is None else argv)
|
||||||
run_kind = _probe_run_kind(raw_args)
|
run_kind = _probe_run_kind(raw_args)
|
||||||
if run_kind == "benchmark":
|
if run_kind == "benchmark":
|
||||||
|
|||||||
Reference in New Issue
Block a user