catchup: rogue scripts

2026-05-31 16:43:36 +00:00 · 2026-02-27 12:45:46 +01:00
parent e8a9716f69
commit 5444a4ea13
27 changed files with 6908 additions and 2 deletions
--- a/engine/sweeps/tpu_pod.yaml
+++ b/engine/sweeps/tpu_pod.yaml
@@ -0,0 +1,64 @@
+method: bayes
+metric:
+  name: sweep/score
+  goal: maximize
+command:
+  - ${env}
+  - python
+  - -m
+  - engine.train
+parameters:
+  use_jax:
+    value: true
+  # pmap requires all workers to compile the same computation graph shape,
+  # so structural params are fixed -- only research/scalar params are swept
+  algo:
+    values: [ppo, a2c]
+  jax_num_envs:
+    value: 32
+  jax_num_steps:
+    value: 128
+  jax_num_minibatches:
+    value: 4
+  jax_update_epochs:
+    value: 4
+  total_timesteps:
+    value: 100000
+  checkpoint_interval:
+    value: 200000
+  n_products:
+    value: 10
+  action_levels:
+    value: 9
+  # research parameters -- primary sweep targets
+  alpha:
+    distribution: uniform
+    min: 0.1
+    max: 0.6
+  lambda_coi:
+    distribution: uniform
+    min: 0.05
+    max: 0.6
+  robust_radius:
+    distribution: uniform
+    min: 0.0
+    max: 0.3
+  info_value:
+    distribution: uniform
+    min: 0.5
+    max: 2.0
+  revenue_weight:
+    values: [0.005, 0.01, 0.02]
+  # training hyperparameters
+  learning_rate:
+    distribution: log_uniform_values
+    min: 1.0e-5
+    max: 1.0e-3
+  gamma:
+    values: [0.97, 0.99, 0.995]
+  gae_lambda:
+    values: [0.9, 0.95, 0.98]
+  clip_range:
+    values: [0.1, 0.2, 0.3]
+  ent_coef:
+    values: [0.0, 0.005, 0.01]