method: bayes metric: name: objective/score goal: maximize command: - ${env} - python - -m - engine.train parameters: use_jax: value: true # pmap requires all workers to compile the same computation graph shape, # so structural params are fixed -- only research/scalar params are swept algo: values: [ppo, a2c] jax_num_envs: value: 32 jax_num_steps: value: 128 jax_num_minibatches: value: 4 jax_update_epochs: value: 4 total_timesteps: value: 100000 checkpoint_interval: value: 200000 n_products: value: 10 action_levels: value: 9 # research parameters -- primary sweep targets alpha: distribution: uniform min: 0.1 max: 0.6 lambda_coi: distribution: uniform min: 0.05 max: 0.6 robust_radius: distribution: uniform min: 0.0 max: 0.3 info_value: distribution: uniform min: 0.5 max: 2.0 revenue_weight: values: [0.005, 0.01, 0.02] # training hyperparameters learning_rate: distribution: log_uniform_values min: 1.0e-5 max: 1.0e-3 gamma: values: [0.97, 0.99, 0.995] gae_lambda: values: [0.9, 0.95, 0.98] clip_range: values: [0.1, 0.2, 0.3] ent_coef: values: [0.0, 0.005, 0.01]