Files
PHANTOM/engine/sweeps/sac_tune.yaml

55 lines
979 B
YAML

method: bayes
metric:
name: objective/score
goal: maximize
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
value: sac
total_timesteps:
values: [50000, 80000, 120000]
seed:
values: [13, 42, 77]
alpha:
distribution: uniform
min: 0.15
max: 0.55
n_products:
values: [8, 10, 12]
lambda_coi:
distribution: uniform
min: 0.05
max: 0.5
robust_radius:
distribution: uniform
min: 0.05
max: 0.3
robust_points:
values: [3, 5, 7]
info_value:
distribution: uniform
min: 0.5
max: 2.0
revenue_weight:
values: [0.005, 0.01, 0.02]
learning_rate:
distribution: log_uniform_values
min: 3.0e-5
max: 1.0e-3
gamma:
values: [0.98, 0.99, 0.995]
buffer_size:
values: [50000, 100000, 200000]
batch_size:
values: [128, 256, 512]
tau:
values: [0.002, 0.005, 0.01]
train_freq:
values: [1, 4, 8]
learning_starts:
values: [1000, 3000, 5000]