mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
87 lines
1.6 KiB
YAML
87 lines
1.6 KiB
YAML
method: random
|
|
metric:
|
|
name: objective/score
|
|
goal: maximize
|
|
command:
|
|
- ${env}
|
|
- python
|
|
- -m
|
|
- engine.train
|
|
parameters:
|
|
algo:
|
|
values: [ppo, a2c, dqn, qtable]
|
|
arch:
|
|
values: [tiny, small, medium]
|
|
activation:
|
|
values: [relu, tanh]
|
|
total_timesteps:
|
|
values: [8000, 12000, 20000]
|
|
seed:
|
|
values: [13, 42, 77]
|
|
n_products:
|
|
values: [6, 8, 10]
|
|
alpha:
|
|
distribution: uniform
|
|
min: 0.1
|
|
max: 0.5
|
|
lambda_coi:
|
|
distribution: uniform
|
|
min: 0.05
|
|
max: 0.4
|
|
robust_radius:
|
|
values: [0.0, 0.1, 0.2]
|
|
robust_points:
|
|
values: [3, 5]
|
|
info_value:
|
|
values: [0.75, 1.0, 1.5]
|
|
revenue_weight:
|
|
values: [0.005, 0.01, 0.02]
|
|
learning_rate:
|
|
distribution: log_uniform_values
|
|
min: 1.0e-5
|
|
max: 5.0e-4
|
|
gamma:
|
|
values: [0.98, 0.99]
|
|
buffer_size:
|
|
values: [10000, 30000, 50000]
|
|
batch_size:
|
|
values: [64, 128, 256]
|
|
tau:
|
|
values: [0.002, 0.005, 0.01]
|
|
train_freq:
|
|
values: [1, 4]
|
|
learning_starts:
|
|
values: [500, 1000, 2000]
|
|
n_steps:
|
|
values: [256, 512, 1024]
|
|
n_epochs:
|
|
values: [5, 10]
|
|
gae_lambda:
|
|
values: [0.9, 0.95]
|
|
clip_range:
|
|
values: [0.1, 0.2]
|
|
ent_coef:
|
|
values: [0.0, 0.005]
|
|
target_update_interval:
|
|
values: [500, 1000]
|
|
exploration_fraction:
|
|
values: [0.1, 0.2]
|
|
exploration_final_eps:
|
|
values: [0.02, 0.05]
|
|
action_levels:
|
|
values: [5, 7, 9]
|
|
action_scale_low:
|
|
values: [0.85, 0.9]
|
|
action_scale_high:
|
|
values: [1.1, 1.15]
|
|
q_lr:
|
|
values: [0.05, 0.1, 0.2]
|
|
q_bins:
|
|
values: [4, 6, 8]
|
|
eps_start:
|
|
value: 1.0
|
|
eps_end:
|
|
values: [0.02, 0.05]
|
|
eps_decay:
|
|
values: [0.999, 0.9995]
|