mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
61 lines
941 B
YAML
61 lines
941 B
YAML
method: grid
|
|
metric:
|
|
name: eval/stress_reward_worst
|
|
goal: maximize
|
|
command:
|
|
- ${env}
|
|
- python
|
|
- -m
|
|
- engine.train
|
|
parameters:
|
|
algo:
|
|
value: ppo
|
|
backend:
|
|
value: sb3
|
|
device:
|
|
value: cpu
|
|
seed:
|
|
values: [42, 1337, 7777]
|
|
alpha:
|
|
values: [0.1, 0.2, 0.3, 0.4, 0.6, 0.8]
|
|
n_products:
|
|
values: [25, 50, 100]
|
|
N:
|
|
value: 100
|
|
no_robust:
|
|
values: [false, true]
|
|
lambda_coi:
|
|
values: [0.15, 0.30]
|
|
robust_radius:
|
|
value: 0.2
|
|
robust_points:
|
|
value: 7
|
|
robust_rollouts:
|
|
value: 1
|
|
eta_ux:
|
|
value: 0.5
|
|
reward_profit_weight:
|
|
value: 1.0
|
|
action_levels:
|
|
value: 9
|
|
action_scale_low:
|
|
value: 0.8
|
|
action_scale_high:
|
|
value: 1.2
|
|
total_timesteps:
|
|
value: 100000
|
|
eval_episodes:
|
|
value: 12
|
|
eval_freq:
|
|
value: 1000
|
|
log_freq:
|
|
value: 100
|
|
hist_freq:
|
|
value: 500
|
|
learning_rate:
|
|
value: 0.0003
|
|
batch_size:
|
|
value: 256
|
|
n_steps:
|
|
value: 2048
|