feature: drafting studies directory

This commit is contained in:
2026-01-30 13:18:20 +01:00
parent 28d3f6853e
commit 52fe865598
3 changed files with 229 additions and 0 deletions

34
engine/studies/factors.py Normal file
View File

@@ -0,0 +1,34 @@
"""shared factor definitions for experimental designs"""
import numpy as np
from dataclasses import dataclass, field
from typing import Callable, Any
@dataclass
class Factor:
name: str
levels: list
primary: bool = True # full cross vs sampled
# demand functions with compatible signatures
def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size))
def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size)
def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size)
def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size)
DEMAND_FUNCTIONS = {
"linear": demand_linear,
"uniform": demand_uniform,
"exponential": demand_exponential,
"logistic": demand_logistic,
}
FACTORS = [
Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True),
Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True),
Factor("n_products", [5, 15, 30, 50], primary=True),
Factor("demand_mu", [30.0, 50.0, 70.0], primary=False),
Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False),
Factor("N", [100, 500, 1000], primary=False),
]
SEEDS_PER_CONFIG = 5

View File

@@ -0,0 +1,89 @@
"""full factorial design - all factor combinations"""
import sys
sys.path.insert(0, "..")
import logging
from itertools import product
import json
import hashlib
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
def generate_configs():
"""generate all factor combinations with seeds"""
all_levels = [f.levels for f in FACTORS]
names = [f.name for f in FACTORS]
configs = []
for combo in product(*all_levels):
base = {names[i]: combo[i] for i in range(len(names))}
for seed in range(SEEDS_PER_CONFIG):
cfg = {**base, "seed": seed}
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
configs.append(cfg)
return configs
def run_single(cfg: dict) -> dict:
"""execute one experiment config, return metrics"""
from engine.wrapper import PHANTOM
import numpy as np
np.random.seed(cfg["seed"])
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
env = PHANTOM(
n_products=cfg["n_products"],
alpha=cfg["alpha"],
N=cfg["N"],
)
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
obs, _ = env.reset()
total_reward, steps = 0.0, 0
for _ in range(100):
action = env.action_space.sample()
obs, reward, term, trunc, _ = env.step(action)
total_reward += reward
steps += 1
if term: break
env.close()
return {
"id": cfg["id"],
"config": cfg,
"total_reward": total_reward,
"avg_reward": total_reward / steps,
"steps": steps,
}
def run_study(max_workers: int = None, output: str = "results_full.jsonl"):
configs = generate_configs()
log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)")
results = []
with ProcessPoolExecutor(max_workers=max_workers) as ex:
for i, result in enumerate(ex.map(run_single, configs)):
results.append(result)
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
Path(output).write_text("\n".join(json.dumps(r) for r in results))
log.info(f"wrote {len(results)} results to {output}")
return results
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--workers", type=int, default=None)
p.add_argument("--output", default="results_full.jsonl")
p.add_argument("--dry-run", action="store_true", help="only show design size")
args = p.parse_args()
configs = generate_configs()
log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}")
if not args.dry_run:
run_study(args.workers, args.output)

106
engine/studies/mixed_lh.py Normal file
View File

@@ -0,0 +1,106 @@
"""mixed design: full factorial on primary factors, latin hypercube on secondary"""
import sys
sys.path.insert(0, "..")
import logging
from itertools import product
import json
import hashlib
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import numpy as np
from scipy.stats.qmc import LatinHypercube
from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
LH_SAMPLES = 10
def generate_configs(lh_samples: int = LH_SAMPLES):
primary = [f for f in FACTORS if f.primary]
secondary = [f for f in FACTORS if not f.primary]
primary_grid = list(product(*[f.levels for f in primary]))
lhs = LatinHypercube(d=len(secondary), seed=42)
configs = []
for p_combo in primary_grid:
samples = lhs.random(n=lh_samples)
for s in samples:
sec_vals = {
secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))]
for i in range(len(secondary))
}
base = {primary[i].name: p_combo[i] for i in range(len(primary))}
base.update(sec_vals)
for seed in range(SEEDS_PER_CONFIG):
cfg = {**base, "seed": seed}
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
configs.append(cfg)
return configs
def run_single(cfg: dict) -> dict:
from engine.wrapper import PHANTOM
import numpy as np
np.random.seed(cfg["seed"])
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
env = PHANTOM(
n_products=cfg["n_products"],
alpha=cfg["alpha"],
N=cfg["N"],
)
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
obs, _ = env.reset()
total_reward, steps = 0.0, 0
for _ in range(100):
action = env.action_space.sample()
obs, reward, term, trunc, _ = env.step(action)
total_reward += reward
steps += 1
if term: break
env.close()
return {
"id": cfg["id"],
"config": cfg,
"total_reward": total_reward,
"avg_reward": total_reward / steps,
"steps": steps,
}
def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES):
configs = generate_configs(lh_samples)
n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary]))
log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)")
results = []
with ProcessPoolExecutor(max_workers=max_workers) as ex:
for i, result in enumerate(ex.map(run_single, configs)):
results.append(result)
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
Path(output).write_text("\n".join(json.dumps(r) for r in results))
log.info(f"wrote {len(results)} results to {output}")
return results
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--workers", type=int, default=None)
p.add_argument("--output", default="results_mixed.jsonl")
p.add_argument("--lh-samples", type=int, default=10)
p.add_argument("--dry-run", action="store_true", help="only show design size")
args = p.parse_args()
primary = [f for f in FACTORS if f.primary]
secondary = [f for f in FACTORS if not f.primary]
configs = generate_configs(args.lh_samples)
log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}")
if not args.dry_run:
run_study(args.workers, args.output, args.lh_samples)