Files
PHANTOM/engine/sweeps/final_thesis_proof.yaml
2026-03-23 14:14:08 +01:00

61 lines
941 B
YAML

method: grid
metric:
name: eval/stress_reward_worst
goal: maximize
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
value: ppo
backend:
value: sb3
device:
value: cpu
seed:
values: [42, 1337, 7777]
alpha:
values: [0.1, 0.2, 0.3, 0.4, 0.6, 0.8]
n_products:
values: [25, 50, 100]
N:
value: 100
no_robust:
values: [false, true]
lambda_coi:
values: [0.15, 0.30]
robust_radius:
value: 0.2
robust_points:
value: 7
robust_rollouts:
value: 1
eta_ux:
value: 0.5
reward_profit_weight:
value: 1.0
action_levels:
value: 9
action_scale_low:
value: 0.8
action_scale_high:
value: 1.2
total_timesteps:
value: 100000
eval_episodes:
value: 12
eval_freq:
value: 1000
log_freq:
value: 100
hist_freq:
value: 500
learning_rate:
value: 0.0003
batch_size:
value: 256
n_steps:
value: 2048