mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
314 lines
11 KiB
Python
314 lines
11 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import contextlib
|
|
import io
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def _base_sweep(method: str, metric_name: str) -> dict[str, Any]:
|
|
return {
|
|
"method": str(method),
|
|
"metric": {"name": str(metric_name), "goal": "maximize"},
|
|
}
|
|
|
|
|
|
def _benchmark_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="objective/score")
|
|
cfg["name"] = "benchmark-all-algos-defense"
|
|
cfg["parameters"] = {
|
|
"tiers": {
|
|
"values": [
|
|
"static",
|
|
"surge",
|
|
"linear",
|
|
"qtable",
|
|
"ppo",
|
|
"a2c",
|
|
"dqn",
|
|
]
|
|
},
|
|
"alpha_values": {"values": ["0.0", "0.1", "0.25", "0.4", "0.6", "0.8"]},
|
|
"baseline_mode": {"values": [False, True]},
|
|
"seed": {"values": [42, 1337, 2026, 7777]},
|
|
"episodes": {"values": [8, 12]},
|
|
"total_timesteps": {"values": [15000, 30000, 50000]},
|
|
"lambda_coi": {"values": [0.1, 0.2, 0.4]},
|
|
"ambiguity_radius": {"values": [0.1, 0.2, 0.3]},
|
|
"ambiguity_points": {"values": [5, 7]},
|
|
"ambiguity_rollouts": {"values": [1, 2]},
|
|
"eta_ux": {"values": [0.25, 0.5, 0.75]},
|
|
"reward_profit_weight": {"values": [0.75, 1.0, 1.25]},
|
|
"learning_rate": {"values": [1e-4, 3e-4, 1e-3]},
|
|
"batch_size": {"values": [128, 256, 512]},
|
|
"n_steps": {"values": [1024, 2048, 4096]},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _train_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="objective/score")
|
|
cfg["name"] = "train-all-algos-defense"
|
|
cfg["parameters"] = {
|
|
"algo": {"values": ["qtable", "ppo", "a2c", "dqn"]},
|
|
"alpha": {"values": [0.0, 0.1, 0.25, 0.4, 0.6]},
|
|
"baseline_mode": {"values": [False, True]},
|
|
"seed": {"values": [42, 1337, 2026, 7777]},
|
|
"total_timesteps": {"values": [30000, 50000, 80000]},
|
|
"learning_rate": {"values": [1e-4, 3e-4, 1e-3]},
|
|
"batch_size": {"values": [128, 256, 512]},
|
|
"n_steps": {"values": [1024, 2048, 4096]},
|
|
"lambda_coi": {"values": [0.1, 0.2, 0.4]},
|
|
"ambiguity_radius": {"values": [0.1, 0.2, 0.3]},
|
|
"ambiguity_points": {"values": [3, 5, 7]},
|
|
"ambiguity_rollouts": {"values": [1, 2]},
|
|
"eta_ux": {"values": [0.25, 0.5, 0.75]},
|
|
"reward_profit_weight": {"values": [0.75, 1.0, 1.25]},
|
|
"N": {"values": [80, 100, 140]},
|
|
"max_steps": {"values": [80, 100, 120]},
|
|
"action_levels": {"values": [7, 9, 11]},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _train_robust_revenue_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="eval/stress_revenue_worst")
|
|
cfg["name"] = "train-defense-revenue-search"
|
|
cfg["parameters"] = {
|
|
"algo": {"values": ["qtable", "ppo", "a2c", "dqn"]},
|
|
"alpha": {"values": [0.4, 0.6, 0.8]},
|
|
"baseline_mode": {"value": False},
|
|
"seed": {"values": [42, 1337, 2026, 7777]},
|
|
"total_timesteps": {"values": [60_000, 80_000, 120_000]},
|
|
"learning_rate": {"values": [1e-4, 3e-4, 1e-3]},
|
|
"batch_size": {"values": [128, 256, 512]},
|
|
"n_steps": {"values": [1024, 2048, 4096]},
|
|
"lambda_coi": {"values": [0.2, 0.4, 0.6]},
|
|
"ambiguity_radius": {"values": [0.1, 0.2, 0.3]},
|
|
"ambiguity_points": {"values": [5, 7, 9]},
|
|
"ambiguity_rollouts": {"values": [1, 2]},
|
|
"eta_ux": {"values": [0.25, 0.5, 0.75]},
|
|
"reward_profit_weight": {"values": [1.0, 1.25]},
|
|
"N": {"values": [80, 100, 140]},
|
|
"max_steps": {"values": [80, 100, 120]},
|
|
"action_levels": {"values": [7, 9, 11]},
|
|
"margin_floor": {"value": 0.85},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _ppo_calibration_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="objective/score")
|
|
cfg["name"] = "benchmark-ppo-calibration"
|
|
cfg["parameters"] = {
|
|
"tiers": {"value": "ppo"},
|
|
"alpha_values": {"values": ["0.0", "0.1", "0.25", "0.4", "0.6", "0.8"]},
|
|
"baseline_mode": {"values": [False, True]},
|
|
"seed": {"values": [42, 1337, 2026, 7777]},
|
|
"episodes": {"value": 12},
|
|
"total_timesteps": {"value": 60000},
|
|
"lambda_coi": {
|
|
"distribution": "uniform",
|
|
"min": 0.05,
|
|
"max": 0.6,
|
|
},
|
|
"ambiguity_radius": {
|
|
"distribution": "uniform",
|
|
"min": 0.05,
|
|
"max": 0.45,
|
|
},
|
|
"ambiguity_points": {"value": 7},
|
|
"ambiguity_rollouts": {"value": 1},
|
|
"eta_ux": {"value": 0.5},
|
|
"reward_profit_weight": {"value": 1.0},
|
|
"learning_rate": {
|
|
"distribution": "log_uniform_values",
|
|
"min": 1e-4,
|
|
"max": 1e-3,
|
|
},
|
|
"batch_size": {"values": [128, 256, 512]},
|
|
"n_steps": {"values": [1024, 2048, 4096]},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _ppo_block_a_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="objective/score")
|
|
cfg["name"] = "benchmark-ppo-block-a-calibration"
|
|
cfg["parameters"] = {
|
|
"tiers": {"value": "ppo"},
|
|
"alpha_values": {"value": "0.25,0.6,0.8"},
|
|
"seed": {"values": [42, 1337, 2026]},
|
|
"episodes": {"value": 12},
|
|
"total_timesteps": {"value": 80000},
|
|
"lambda_coi": {"values": [0.05, 0.1, 0.2]},
|
|
"ambiguity_radius": {"values": [0.05, 0.1, 0.2]},
|
|
"ambiguity_points": {"value": 7},
|
|
"ambiguity_rollouts": {"value": 1},
|
|
"eta_ux": {"value": 0.5},
|
|
"reward_profit_weight": {"value": 1.0},
|
|
"learning_rate": {"value": 3e-4},
|
|
"batch_size": {"value": 256},
|
|
"n_steps": {"value": 2048},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _ppo_shift_screen_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="objective/score")
|
|
cfg["name"] = "benchmark-ppo-shift-screen"
|
|
cfg["parameters"] = {
|
|
"tiers": {"value": "ppo"},
|
|
"alpha_values": {"value": "0.25"},
|
|
"eval_alpha_values": {"value": "0.6,0.8"},
|
|
"seed": {"values": [42, 1337, 2026]},
|
|
"episodes": {"value": 20},
|
|
"total_timesteps": {"value": 80000},
|
|
"lambda_coi": {"values": [0.0, 0.02, 0.05, 0.1]},
|
|
"ambiguity_radius": {"values": [0.0, 0.02, 0.05, 0.1]},
|
|
"ambiguity_points": {"value": 5},
|
|
"ambiguity_rollouts": {"value": 1},
|
|
"eta_ux": {"value": 0.0},
|
|
"reward_profit_weight": {"value": 1.0},
|
|
"learning_rate": {"value": 3e-4},
|
|
"batch_size": {"value": 256},
|
|
"n_steps": {"value": 2048},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def _ppo_rl_study_sweep(method: str) -> dict[str, Any]:
|
|
cfg = _base_sweep(method=method, metric_name="eval/stress_revenue_worst")
|
|
cfg["name"] = "train-ppo-standard-vs-defended-equilibrium"
|
|
cfg["parameters"] = {
|
|
"algo": {"value": "ppo"},
|
|
"seed": {"values": [42, 1337, 7777]},
|
|
"alpha": {"values": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]},
|
|
"n_products": {"values": [5, 25, 50, 100]},
|
|
"N": {"value": 100},
|
|
"no_robust": {"values": [False, True]},
|
|
"lambda_coi": {"values": [0.05, 0.15, 0.3]},
|
|
"ambiguity_radius": {"values": [0.1, 0.2, 0.3]},
|
|
"ambiguity_points": {"value": 7},
|
|
"ambiguity_rollouts": {"value": 1},
|
|
"eta_ux": {"value": 0.0},
|
|
"reward_profit_weight": {"value": 1.0},
|
|
"total_timesteps": {"value": 100000},
|
|
"eval_episodes": {"value": 10},
|
|
"eval_freq": {"value": 1000},
|
|
"log_freq": {"value": 100},
|
|
"hist_freq": {"value": 500},
|
|
"learning_rate": {"value": 3e-4},
|
|
"batch_size": {"value": 256},
|
|
"n_steps": {"value": 2048},
|
|
"device": {"value": "cpu"},
|
|
}
|
|
return cfg
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Create W&B sweep for PHANTOM")
|
|
parser.add_argument(
|
|
"--kind",
|
|
choices=[
|
|
"benchmark",
|
|
"train",
|
|
"ppo_calibration",
|
|
"ppo_block_a",
|
|
"ppo_shift_screen",
|
|
"ppo_rl_study",
|
|
],
|
|
default="benchmark",
|
|
)
|
|
parser.add_argument(
|
|
"--profile",
|
|
choices=["default", "robust_revenue"],
|
|
default="default",
|
|
)
|
|
parser.add_argument("--project", required=True)
|
|
parser.add_argument("--entity", default="")
|
|
parser.add_argument(
|
|
"--method", choices=["random", "bayes", "grid"], default="random"
|
|
)
|
|
parser.add_argument("--run-cap", type=int, default=0)
|
|
parser.add_argument("--json", action="store_true")
|
|
parser.add_argument("--full-id", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
cwd = str(Path.cwd())
|
|
sys.path = [p for p in sys.path if p not in {"", cwd}]
|
|
|
|
try:
|
|
import wandb
|
|
except ImportError as exc:
|
|
raise ImportError("wandb is required to create sweeps") from exc
|
|
|
|
if str(args.kind) == "benchmark":
|
|
if str(args.profile) != "default":
|
|
raise ValueError("benchmark sweeps only support --profile default")
|
|
sweep_cfg = _benchmark_sweep(args.method)
|
|
elif str(args.kind) == "train":
|
|
if str(args.profile) == "robust_revenue":
|
|
sweep_cfg = _train_robust_revenue_sweep(args.method)
|
|
else:
|
|
sweep_cfg = _train_sweep(args.method)
|
|
elif str(args.kind) == "ppo_calibration":
|
|
if str(args.profile) != "default":
|
|
raise ValueError("ppo_calibration sweeps only support --profile default")
|
|
sweep_cfg = _ppo_calibration_sweep(args.method)
|
|
elif str(args.kind) == "ppo_block_a":
|
|
if str(args.profile) != "default":
|
|
raise ValueError("ppo_block_a sweeps only support --profile default")
|
|
sweep_cfg = _ppo_block_a_sweep(args.method)
|
|
elif str(args.kind) == "ppo_shift_screen":
|
|
if str(args.profile) != "default":
|
|
raise ValueError("ppo_shift_screen sweeps only support --profile default")
|
|
sweep_cfg = _ppo_shift_screen_sweep(args.method)
|
|
else:
|
|
if str(args.profile) != "default":
|
|
raise ValueError("ppo_rl_study sweeps only support --profile default")
|
|
sweep_cfg = _ppo_rl_study_sweep(args.method)
|
|
if int(args.run_cap) > 0:
|
|
sweep_cfg["run_cap"] = int(args.run_cap)
|
|
|
|
with contextlib.redirect_stdout(io.StringIO()):
|
|
sweep_id = wandb.sweep(
|
|
sweep=sweep_cfg,
|
|
project=str(args.project),
|
|
entity=str(args.entity) if str(args.entity).strip() else None,
|
|
)
|
|
full_id = (
|
|
f"{args.entity}/{args.project}/{sweep_id}"
|
|
if str(args.entity).strip()
|
|
else f"{args.project}/{sweep_id}"
|
|
)
|
|
|
|
if bool(args.json):
|
|
print(
|
|
json.dumps(
|
|
{
|
|
"kind": str(args.kind),
|
|
"profile": str(args.profile),
|
|
"project": str(args.project),
|
|
"entity": str(args.entity),
|
|
"sweep_id": str(sweep_id),
|
|
"full_id": str(full_id),
|
|
}
|
|
)
|
|
)
|
|
return
|
|
print(full_id if bool(args.full_id) else sweep_id)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|