mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
catchup: rogue scripts
This commit is contained in:
64
engine/sweeps/tpu_pod.yaml
Normal file
64
engine/sweeps/tpu_pod.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
method: bayes
|
||||
metric:
|
||||
name: sweep/score
|
||||
goal: maximize
|
||||
command:
|
||||
- ${env}
|
||||
- python
|
||||
- -m
|
||||
- engine.train
|
||||
parameters:
|
||||
use_jax:
|
||||
value: true
|
||||
# pmap requires all workers to compile the same computation graph shape,
|
||||
# so structural params are fixed -- only research/scalar params are swept
|
||||
algo:
|
||||
values: [ppo, a2c]
|
||||
jax_num_envs:
|
||||
value: 32
|
||||
jax_num_steps:
|
||||
value: 128
|
||||
jax_num_minibatches:
|
||||
value: 4
|
||||
jax_update_epochs:
|
||||
value: 4
|
||||
total_timesteps:
|
||||
value: 100000
|
||||
checkpoint_interval:
|
||||
value: 200000
|
||||
n_products:
|
||||
value: 10
|
||||
action_levels:
|
||||
value: 9
|
||||
# research parameters -- primary sweep targets
|
||||
alpha:
|
||||
distribution: uniform
|
||||
min: 0.1
|
||||
max: 0.6
|
||||
lambda_coi:
|
||||
distribution: uniform
|
||||
min: 0.05
|
||||
max: 0.6
|
||||
robust_radius:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.3
|
||||
info_value:
|
||||
distribution: uniform
|
||||
min: 0.5
|
||||
max: 2.0
|
||||
revenue_weight:
|
||||
values: [0.005, 0.01, 0.02]
|
||||
# training hyperparameters
|
||||
learning_rate:
|
||||
distribution: log_uniform_values
|
||||
min: 1.0e-5
|
||||
max: 1.0e-3
|
||||
gamma:
|
||||
values: [0.97, 0.99, 0.995]
|
||||
gae_lambda:
|
||||
values: [0.9, 0.95, 0.98]
|
||||
clip_range:
|
||||
values: [0.1, 0.2, 0.3]
|
||||
ent_coef:
|
||||
values: [0.0, 0.005, 0.01]
|
||||
Reference in New Issue
Block a user