mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
adding naive jax and libraries and make adjustments
This commit is contained in:
84
engine/sweeps/model_mix.yaml
Normal file
84
engine/sweeps/model_mix.yaml
Normal file
@@ -0,0 +1,84 @@
|
||||
method: random
|
||||
metric:
|
||||
name: sweep/score
|
||||
goal: maximize
|
||||
command:
|
||||
- ${env}
|
||||
- python
|
||||
- -m
|
||||
- engine.train
|
||||
parameters:
|
||||
algo:
|
||||
values: [ppo, a2c, dqn, qtable]
|
||||
total_timesteps:
|
||||
values: [30000, 50000, 80000]
|
||||
seed:
|
||||
values: [13, 42, 77]
|
||||
n_products:
|
||||
values: [8, 10, 12]
|
||||
alpha:
|
||||
distribution: uniform
|
||||
min: 0.1
|
||||
max: 0.6
|
||||
lambda_coi:
|
||||
distribution: uniform
|
||||
min: 0.05
|
||||
max: 0.6
|
||||
robust_radius:
|
||||
distribution: uniform
|
||||
min: 0.0
|
||||
max: 0.3
|
||||
robust_points:
|
||||
values: [3, 5, 7]
|
||||
info_value:
|
||||
distribution: uniform
|
||||
min: 0.5
|
||||
max: 2.0
|
||||
revenue_weight:
|
||||
values: [0.005, 0.01, 0.02]
|
||||
learning_rate:
|
||||
distribution: log_uniform_values
|
||||
min: 1.0e-5
|
||||
max: 1.0e-3
|
||||
gamma:
|
||||
values: [0.97, 0.99, 0.995]
|
||||
buffer_size:
|
||||
values: [20000, 50000, 100000]
|
||||
batch_size:
|
||||
values: [128, 256, 512]
|
||||
tau:
|
||||
values: [0.002, 0.005, 0.01]
|
||||
train_freq:
|
||||
values: [1, 4, 8]
|
||||
learning_starts:
|
||||
values: [500, 1000, 3000]
|
||||
n_steps:
|
||||
values: [512, 1024, 2048]
|
||||
n_epochs:
|
||||
values: [5, 10, 20]
|
||||
gae_lambda:
|
||||
values: [0.9, 0.95, 0.98]
|
||||
clip_range:
|
||||
values: [0.1, 0.2, 0.3]
|
||||
ent_coef:
|
||||
values: [0.0, 0.005, 0.01]
|
||||
target_update_interval:
|
||||
values: [500, 1000, 2000]
|
||||
exploration_fraction:
|
||||
values: [0.1, 0.2, 0.3]
|
||||
exploration_final_eps:
|
||||
values: [0.01, 0.03, 0.05]
|
||||
action_levels:
|
||||
values: [7, 9, 11]
|
||||
action_scale_low:
|
||||
values: [0.75, 0.8, 0.85]
|
||||
action_scale_high:
|
||||
values: [1.15, 1.2, 1.25]
|
||||
q_lr:
|
||||
values: [0.03, 0.05, 0.1, 0.2]
|
||||
eps_start:
|
||||
value: 1.0
|
||||
eps_end:
|
||||
values: [0.02, 0.05, 0.1]
|
||||
eps_decay:
|
||||
values: [0.999, 0.9995, 0.9999]
|
||||
85
engine/sweeps/models_only.yaml
Normal file
85
engine/sweeps/models_only.yaml
Normal file
@@ -0,0 +1,85 @@
|
||||
method: grid
|
||||
metric:
|
||||
name: sweep/score
|
||||
goal: maximize
|
||||
run_cap: 4
|
||||
command:
|
||||
- ${env}
|
||||
- python
|
||||
- -m
|
||||
- engine.train
|
||||
parameters:
|
||||
algo:
|
||||
values: [ppo, a2c, dqn, qtable]
|
||||
seed:
|
||||
value: 42
|
||||
total_timesteps:
|
||||
value: 12000
|
||||
eval_episodes:
|
||||
value: 3
|
||||
eval_freq:
|
||||
value: 500
|
||||
log_freq:
|
||||
value: 100
|
||||
revenue_weight:
|
||||
value: 0.01
|
||||
n_products:
|
||||
value: 8
|
||||
N:
|
||||
value: 80
|
||||
alpha:
|
||||
value: 0.3
|
||||
lambda_coi:
|
||||
value: 0.2
|
||||
robust_radius:
|
||||
value: 0.0
|
||||
robust_points:
|
||||
value: 1
|
||||
info_value:
|
||||
value: 1.0
|
||||
learning_rate:
|
||||
value: 0.0003
|
||||
gamma:
|
||||
value: 0.99
|
||||
buffer_size:
|
||||
value: 20000
|
||||
batch_size:
|
||||
value: 128
|
||||
tau:
|
||||
value: 0.005
|
||||
train_freq:
|
||||
value: 1
|
||||
learning_starts:
|
||||
value: 500
|
||||
n_steps:
|
||||
value: 512
|
||||
n_epochs:
|
||||
value: 10
|
||||
gae_lambda:
|
||||
value: 0.95
|
||||
clip_range:
|
||||
value: 0.2
|
||||
ent_coef:
|
||||
value: 0.0
|
||||
target_update_interval:
|
||||
value: 500
|
||||
exploration_fraction:
|
||||
value: 0.2
|
||||
exploration_final_eps:
|
||||
value: 0.05
|
||||
action_levels:
|
||||
value: 7
|
||||
action_scale_low:
|
||||
value: 0.9
|
||||
action_scale_high:
|
||||
value: 1.1
|
||||
q_lr:
|
||||
value: 0.1
|
||||
q_bins:
|
||||
value: 6
|
||||
eps_start:
|
||||
value: 1.0
|
||||
eps_end:
|
||||
value: 0.05
|
||||
eps_decay:
|
||||
value: 0.9995
|
||||
54
engine/sweeps/sac_tune.yaml
Normal file
54
engine/sweeps/sac_tune.yaml
Normal file
@@ -0,0 +1,54 @@
|
||||
method: bayes
|
||||
metric:
|
||||
name: sweep/score
|
||||
goal: maximize
|
||||
command:
|
||||
- ${env}
|
||||
- python
|
||||
- -m
|
||||
- engine.train
|
||||
parameters:
|
||||
algo:
|
||||
value: sac
|
||||
total_timesteps:
|
||||
values: [50000, 80000, 120000]
|
||||
seed:
|
||||
values: [13, 42, 77]
|
||||
alpha:
|
||||
distribution: uniform
|
||||
min: 0.15
|
||||
max: 0.55
|
||||
n_products:
|
||||
values: [8, 10, 12]
|
||||
lambda_coi:
|
||||
distribution: uniform
|
||||
min: 0.05
|
||||
max: 0.5
|
||||
robust_radius:
|
||||
distribution: uniform
|
||||
min: 0.05
|
||||
max: 0.3
|
||||
robust_points:
|
||||
values: [3, 5, 7]
|
||||
info_value:
|
||||
distribution: uniform
|
||||
min: 0.5
|
||||
max: 2.0
|
||||
revenue_weight:
|
||||
values: [0.005, 0.01, 0.02]
|
||||
learning_rate:
|
||||
distribution: log_uniform_values
|
||||
min: 3.0e-5
|
||||
max: 1.0e-3
|
||||
gamma:
|
||||
values: [0.98, 0.99, 0.995]
|
||||
buffer_size:
|
||||
values: [50000, 100000, 200000]
|
||||
batch_size:
|
||||
values: [128, 256, 512]
|
||||
tau:
|
||||
values: [0.002, 0.005, 0.01]
|
||||
train_freq:
|
||||
values: [1, 4, 8]
|
||||
learning_starts:
|
||||
values: [1000, 3000, 5000]
|
||||
86
engine/sweeps/small_arch_compare.yaml
Normal file
86
engine/sweeps/small_arch_compare.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
method: random
|
||||
metric:
|
||||
name: sweep/score
|
||||
goal: maximize
|
||||
command:
|
||||
- ${env}
|
||||
- python
|
||||
- -m
|
||||
- engine.train
|
||||
parameters:
|
||||
algo:
|
||||
values: [ppo, a2c, dqn, qtable]
|
||||
arch:
|
||||
values: [tiny, small, medium]
|
||||
activation:
|
||||
values: [relu, tanh]
|
||||
total_timesteps:
|
||||
values: [8000, 12000, 20000]
|
||||
seed:
|
||||
values: [13, 42, 77]
|
||||
n_products:
|
||||
values: [6, 8, 10]
|
||||
alpha:
|
||||
distribution: uniform
|
||||
min: 0.1
|
||||
max: 0.5
|
||||
lambda_coi:
|
||||
distribution: uniform
|
||||
min: 0.05
|
||||
max: 0.4
|
||||
robust_radius:
|
||||
values: [0.0, 0.1, 0.2]
|
||||
robust_points:
|
||||
values: [3, 5]
|
||||
info_value:
|
||||
values: [0.75, 1.0, 1.5]
|
||||
revenue_weight:
|
||||
values: [0.005, 0.01, 0.02]
|
||||
learning_rate:
|
||||
distribution: log_uniform_values
|
||||
min: 1.0e-5
|
||||
max: 5.0e-4
|
||||
gamma:
|
||||
values: [0.98, 0.99]
|
||||
buffer_size:
|
||||
values: [10000, 30000, 50000]
|
||||
batch_size:
|
||||
values: [64, 128, 256]
|
||||
tau:
|
||||
values: [0.002, 0.005, 0.01]
|
||||
train_freq:
|
||||
values: [1, 4]
|
||||
learning_starts:
|
||||
values: [500, 1000, 2000]
|
||||
n_steps:
|
||||
values: [256, 512, 1024]
|
||||
n_epochs:
|
||||
values: [5, 10]
|
||||
gae_lambda:
|
||||
values: [0.9, 0.95]
|
||||
clip_range:
|
||||
values: [0.1, 0.2]
|
||||
ent_coef:
|
||||
values: [0.0, 0.005]
|
||||
target_update_interval:
|
||||
values: [500, 1000]
|
||||
exploration_fraction:
|
||||
values: [0.1, 0.2]
|
||||
exploration_final_eps:
|
||||
values: [0.02, 0.05]
|
||||
action_levels:
|
||||
values: [5, 7, 9]
|
||||
action_scale_low:
|
||||
values: [0.85, 0.9]
|
||||
action_scale_high:
|
||||
values: [1.1, 1.15]
|
||||
q_lr:
|
||||
values: [0.05, 0.1, 0.2]
|
||||
q_bins:
|
||||
values: [4, 6, 8]
|
||||
eps_start:
|
||||
value: 1.0
|
||||
eps_end:
|
||||
values: [0.02, 0.05]
|
||||
eps_decay:
|
||||
values: [0.999, 0.9995]
|
||||
Reference in New Issue
Block a user