catchup: rogue scripts

2026-07-15 17:43:36 +00:00 · 2026-02-27 12:45:46 +01:00
parent e8a9716f69
commit 5444a4ea13
27 changed files with 6908 additions and 2 deletions
--- a/engine/sweeps/tpu_jax.yaml
+++ b/engine/sweeps/tpu_jax.yaml
@@ -0,0 +1,93 @@
+method: bayes
+metric:
+  name: sweep/score
+  goal: maximize
+command:
+  - ${env}
+  - python
+  - -m
+  - engine.train
+parameters:
+  # fixed: always use JAX backend so TPU chips are actually exercised
+  use_jax:
+    value: true
+  # all four algos have JAX implementations
+  algo:
+    values: [ppo, a2c, dqn, qtable]
+  total_timesteps:
+    values: [50000, 80000, 120000]
+  checkpoint_interval:
+    value: 200000
+  seed:
+    values: [13, 42, 77]
+  n_products:
+    values: [8, 10, 12]
+  # COI framework parameters -- primary research variables
+  alpha:
+    distribution: uniform
+    min: 0.1
+    max: 0.6
+  lambda_coi:
+    distribution: uniform
+    min: 0.05
+    max: 0.6
+  robust_radius:
+    distribution: uniform
+    min: 0.0
+    max: 0.3
+  robust_points:
+    values: [3, 5, 7]
+  info_value:
+    distribution: uniform
+    min: 0.5
+    max: 2.0
+  revenue_weight:
+    values: [0.005, 0.01, 0.02]
+  # shared hyperparameters
+  learning_rate:
+    distribution: log_uniform_values
+    min: 1.0e-5
+    max: 1.0e-3
+  gamma:
+    values: [0.97, 0.99, 0.995]
+  # JAX parallelism -- key lever for TPU throughput
+  jax_num_envs:
+    values: [8, 16, 32]
+  jax_num_steps:
+    values: [64, 128, 256]
+  jax_num_minibatches:
+    values: [2, 4, 8]
+  jax_update_epochs:
+    values: [2, 4, 8]
+  # PPO/A2C specific
+  gae_lambda:
+    values: [0.9, 0.95, 0.98]
+  clip_range:
+    values: [0.1, 0.2, 0.3]
+  ent_coef:
+    values: [0.0, 0.005, 0.01]
+  # DQN specific
+  buffer_size:
+    values: [20000, 50000, 100000]
+  batch_size:
+    values: [128, 256, 512]
+  learning_starts:
+    values: [500, 1000, 3000]
+  exploration_fraction:
+    values: [0.1, 0.2, 0.3]
+  exploration_final_eps:
+    values: [0.01, 0.03, 0.05]
+  # QTable specific
+  q_lr:
+    values: [0.03, 0.05, 0.1, 0.2]
+  eps_end:
+    values: [0.02, 0.05, 0.1]
+  eps_decay:
+    values: [0.999, 0.9995, 0.9999]
+  # action space
+  action_levels:
+    values: [7, 9, 11]
+  action_scale_low:
+    values: [0.75, 0.8, 0.85]
+  action_scale_high:
+    values: [1.15, 1.2, 1.25]
--- a/engine/sweeps/tpu_pod.yaml
+++ b/engine/sweeps/tpu_pod.yaml
@@ -0,0 +1,64 @@
+method: bayes
+metric:
+  name: sweep/score
+  goal: maximize
+command:
+  - ${env}
+  - python
+  - -m
+  - engine.train
+parameters:
+  use_jax:
+    value: true
+  # pmap requires all workers to compile the same computation graph shape,
+  # so structural params are fixed -- only research/scalar params are swept
+  algo:
+    values: [ppo, a2c]
+  jax_num_envs:
+    value: 32
+  jax_num_steps:
+    value: 128
+  jax_num_minibatches:
+    value: 4
+  jax_update_epochs:
+    value: 4
+  total_timesteps:
+    value: 100000
+  checkpoint_interval:
+    value: 200000
+  n_products:
+    value: 10
+  action_levels:
+    value: 9
+  # research parameters -- primary sweep targets
+  alpha:
+    distribution: uniform
+    min: 0.1
+    max: 0.6
+  lambda_coi:
+    distribution: uniform
+    min: 0.05
+    max: 0.6
+  robust_radius:
+    distribution: uniform
+    min: 0.0
+    max: 0.3
+  info_value:
+    distribution: uniform
+    min: 0.5
+    max: 2.0
+  revenue_weight:
+    values: [0.005, 0.01, 0.02]
+  # training hyperparameters
+  learning_rate:
+    distribution: log_uniform_values
+    min: 1.0e-5
+    max: 1.0e-3
+  gamma:
+    values: [0.97, 0.99, 0.995]
+  gae_lambda:
+    values: [0.9, 0.95, 0.98]
+  clip_range:
+    values: [0.1, 0.2, 0.3]
+  ent_coef:
+    values: [0.0, 0.005, 0.01]