adding naive jax and libraries and make adjustments

This commit is contained in:
2026-02-17 14:48:18 +01:00
parent 66c4a0cd1d
commit 802f31b4a1
17 changed files with 2331 additions and 6 deletions

View File

@@ -0,0 +1,84 @@
method: random
metric:
name: sweep/score
goal: maximize
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
values: [ppo, a2c, dqn, qtable]
total_timesteps:
values: [30000, 50000, 80000]
seed:
values: [13, 42, 77]
n_products:
values: [8, 10, 12]
alpha:
distribution: uniform
min: 0.1
max: 0.6
lambda_coi:
distribution: uniform
min: 0.05
max: 0.6
robust_radius:
distribution: uniform
min: 0.0
max: 0.3
robust_points:
values: [3, 5, 7]
info_value:
distribution: uniform
min: 0.5
max: 2.0
revenue_weight:
values: [0.005, 0.01, 0.02]
learning_rate:
distribution: log_uniform_values
min: 1.0e-5
max: 1.0e-3
gamma:
values: [0.97, 0.99, 0.995]
buffer_size:
values: [20000, 50000, 100000]
batch_size:
values: [128, 256, 512]
tau:
values: [0.002, 0.005, 0.01]
train_freq:
values: [1, 4, 8]
learning_starts:
values: [500, 1000, 3000]
n_steps:
values: [512, 1024, 2048]
n_epochs:
values: [5, 10, 20]
gae_lambda:
values: [0.9, 0.95, 0.98]
clip_range:
values: [0.1, 0.2, 0.3]
ent_coef:
values: [0.0, 0.005, 0.01]
target_update_interval:
values: [500, 1000, 2000]
exploration_fraction:
values: [0.1, 0.2, 0.3]
exploration_final_eps:
values: [0.01, 0.03, 0.05]
action_levels:
values: [7, 9, 11]
action_scale_low:
values: [0.75, 0.8, 0.85]
action_scale_high:
values: [1.15, 1.2, 1.25]
q_lr:
values: [0.03, 0.05, 0.1, 0.2]
eps_start:
value: 1.0
eps_end:
values: [0.02, 0.05, 0.1]
eps_decay:
values: [0.999, 0.9995, 0.9999]

View File

@@ -0,0 +1,85 @@
method: grid
metric:
name: sweep/score
goal: maximize
run_cap: 4
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
values: [ppo, a2c, dqn, qtable]
seed:
value: 42
total_timesteps:
value: 12000
eval_episodes:
value: 3
eval_freq:
value: 500
log_freq:
value: 100
revenue_weight:
value: 0.01
n_products:
value: 8
N:
value: 80
alpha:
value: 0.3
lambda_coi:
value: 0.2
robust_radius:
value: 0.0
robust_points:
value: 1
info_value:
value: 1.0
learning_rate:
value: 0.0003
gamma:
value: 0.99
buffer_size:
value: 20000
batch_size:
value: 128
tau:
value: 0.005
train_freq:
value: 1
learning_starts:
value: 500
n_steps:
value: 512
n_epochs:
value: 10
gae_lambda:
value: 0.95
clip_range:
value: 0.2
ent_coef:
value: 0.0
target_update_interval:
value: 500
exploration_fraction:
value: 0.2
exploration_final_eps:
value: 0.05
action_levels:
value: 7
action_scale_low:
value: 0.9
action_scale_high:
value: 1.1
q_lr:
value: 0.1
q_bins:
value: 6
eps_start:
value: 1.0
eps_end:
value: 0.05
eps_decay:
value: 0.9995

View File

@@ -0,0 +1,54 @@
method: bayes
metric:
name: sweep/score
goal: maximize
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
value: sac
total_timesteps:
values: [50000, 80000, 120000]
seed:
values: [13, 42, 77]
alpha:
distribution: uniform
min: 0.15
max: 0.55
n_products:
values: [8, 10, 12]
lambda_coi:
distribution: uniform
min: 0.05
max: 0.5
robust_radius:
distribution: uniform
min: 0.05
max: 0.3
robust_points:
values: [3, 5, 7]
info_value:
distribution: uniform
min: 0.5
max: 2.0
revenue_weight:
values: [0.005, 0.01, 0.02]
learning_rate:
distribution: log_uniform_values
min: 3.0e-5
max: 1.0e-3
gamma:
values: [0.98, 0.99, 0.995]
buffer_size:
values: [50000, 100000, 200000]
batch_size:
values: [128, 256, 512]
tau:
values: [0.002, 0.005, 0.01]
train_freq:
values: [1, 4, 8]
learning_starts:
values: [1000, 3000, 5000]

View File

@@ -0,0 +1,86 @@
method: random
metric:
name: sweep/score
goal: maximize
command:
- ${env}
- python
- -m
- engine.train
parameters:
algo:
values: [ppo, a2c, dqn, qtable]
arch:
values: [tiny, small, medium]
activation:
values: [relu, tanh]
total_timesteps:
values: [8000, 12000, 20000]
seed:
values: [13, 42, 77]
n_products:
values: [6, 8, 10]
alpha:
distribution: uniform
min: 0.1
max: 0.5
lambda_coi:
distribution: uniform
min: 0.05
max: 0.4
robust_radius:
values: [0.0, 0.1, 0.2]
robust_points:
values: [3, 5]
info_value:
values: [0.75, 1.0, 1.5]
revenue_weight:
values: [0.005, 0.01, 0.02]
learning_rate:
distribution: log_uniform_values
min: 1.0e-5
max: 5.0e-4
gamma:
values: [0.98, 0.99]
buffer_size:
values: [10000, 30000, 50000]
batch_size:
values: [64, 128, 256]
tau:
values: [0.002, 0.005, 0.01]
train_freq:
values: [1, 4]
learning_starts:
values: [500, 1000, 2000]
n_steps:
values: [256, 512, 1024]
n_epochs:
values: [5, 10]
gae_lambda:
values: [0.9, 0.95]
clip_range:
values: [0.1, 0.2]
ent_coef:
values: [0.0, 0.005]
target_update_interval:
values: [500, 1000]
exploration_fraction:
values: [0.1, 0.2]
exploration_final_eps:
values: [0.02, 0.05]
action_levels:
values: [5, 7, 9]
action_scale_low:
values: [0.85, 0.9]
action_scale_high:
values: [1.1, 1.15]
q_lr:
values: [0.05, 0.1, 0.2]
q_bins:
values: [4, 6, 8]
eps_start:
value: 1.0
eps_end:
values: [0.02, 0.05]
eps_decay:
values: [0.999, 0.9995]