method: random metric: name: objective/score goal: maximize command: - ${env} - python - -m - engine.train parameters: algo: values: [ppo, a2c, dqn, qtable] arch: values: [tiny, small, medium] activation: values: [relu, tanh] total_timesteps: values: [8000, 12000, 20000] seed: values: [13, 42, 77] n_products: values: [6, 8, 10] alpha: distribution: uniform min: 0.1 max: 0.5 lambda_coi: distribution: uniform min: 0.05 max: 0.4 robust_radius: values: [0.0, 0.1, 0.2] robust_points: values: [3, 5] info_value: values: [0.75, 1.0, 1.5] revenue_weight: values: [0.005, 0.01, 0.02] learning_rate: distribution: log_uniform_values min: 1.0e-5 max: 5.0e-4 gamma: values: [0.98, 0.99] buffer_size: values: [10000, 30000, 50000] batch_size: values: [64, 128, 256] tau: values: [0.002, 0.005, 0.01] train_freq: values: [1, 4] learning_starts: values: [500, 1000, 2000] n_steps: values: [256, 512, 1024] n_epochs: values: [5, 10] gae_lambda: values: [0.9, 0.95] clip_range: values: [0.1, 0.2] ent_coef: values: [0.0, 0.005] target_update_interval: values: [500, 1000] exploration_fraction: values: [0.1, 0.2] exploration_final_eps: values: [0.02, 0.05] action_levels: values: [5, 7, 9] action_scale_low: values: [0.85, 0.9] action_scale_high: values: [1.1, 1.15] q_lr: values: [0.05, 0.1, 0.2] q_bins: values: [4, 6, 8] eps_start: value: 1.0 eps_end: values: [0.02, 0.05] eps_decay: values: [0.999, 0.9995]