From 52fe8655987876fb8bd22191f455b28f9417784f Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Fri, 30 Jan 2026 13:18:20 +0100 Subject: [PATCH] feature: drafting studies directory --- engine/studies/factors.py | 34 ++++++++++ engine/studies/full_factorial.py | 89 ++++++++++++++++++++++++++ engine/studies/mixed_lh.py | 106 +++++++++++++++++++++++++++++++ 3 files changed, 229 insertions(+) create mode 100644 engine/studies/factors.py create mode 100644 engine/studies/full_factorial.py create mode 100644 engine/studies/mixed_lh.py diff --git a/engine/studies/factors.py b/engine/studies/factors.py new file mode 100644 index 0000000..1fbfbe1 --- /dev/null +++ b/engine/studies/factors.py @@ -0,0 +1,34 @@ +"""shared factor definitions for experimental designs""" +import numpy as np +from dataclasses import dataclass, field +from typing import Callable, Any + +@dataclass +class Factor: + name: str + levels: list + primary: bool = True # full cross vs sampled + +# demand functions with compatible signatures +def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size)) +def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size) +def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size) +def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size) + +DEMAND_FUNCTIONS = { + "linear": demand_linear, + "uniform": demand_uniform, + "exponential": demand_exponential, + "logistic": demand_logistic, +} + +FACTORS = [ + Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True), + Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True), + Factor("n_products", [5, 15, 30, 50], primary=True), + Factor("demand_mu", [30.0, 50.0, 70.0], primary=False), + Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False), + Factor("N", [100, 500, 1000], primary=False), +] + +SEEDS_PER_CONFIG = 5 diff --git a/engine/studies/full_factorial.py b/engine/studies/full_factorial.py new file mode 100644 index 0000000..9b4d1eb --- /dev/null +++ b/engine/studies/full_factorial.py @@ -0,0 +1,89 @@ +"""full factorial design - all factor combinations""" +import sys +sys.path.insert(0, "..") +import logging +from itertools import product +import json +import hashlib +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + +def generate_configs(): + """generate all factor combinations with seeds""" + all_levels = [f.levels for f in FACTORS] + names = [f.name for f in FACTORS] + + configs = [] + for combo in product(*all_levels): + base = {names[i]: combo[i] for i in range(len(names))} + for seed in range(SEEDS_PER_CONFIG): + cfg = {**base, "seed": seed} + cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8] + configs.append(cfg) + return configs + +def run_single(cfg: dict) -> dict: + """execute one experiment config, return metrics""" + from engine.wrapper import PHANTOM + import numpy as np + + np.random.seed(cfg["seed"]) + demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]] + + env = PHANTOM( + n_products=cfg["n_products"], + alpha=cfg["alpha"], + N=cfg["N"], + ) + env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"])) + + obs, _ = env.reset() + total_reward, steps = 0.0, 0 + + for _ in range(100): + action = env.action_space.sample() + obs, reward, term, trunc, _ = env.step(action) + total_reward += reward + steps += 1 + if term: break + + env.close() + return { + "id": cfg["id"], + "config": cfg, + "total_reward": total_reward, + "avg_reward": total_reward / steps, + "steps": steps, + } + +def run_study(max_workers: int = None, output: str = "results_full.jsonl"): + configs = generate_configs() + log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)") + + results = [] + with ProcessPoolExecutor(max_workers=max_workers) as ex: + for i, result in enumerate(ex.map(run_single, configs)): + results.append(result) + if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}") + + Path(output).write_text("\n".join(json.dumps(r) for r in results)) + log.info(f"wrote {len(results)} results to {output}") + return results + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument("--workers", type=int, default=None) + p.add_argument("--output", default="results_full.jsonl") + p.add_argument("--dry-run", action="store_true", help="only show design size") + args = p.parse_args() + + configs = generate_configs() + log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}") + + if not args.dry_run: + run_study(args.workers, args.output) diff --git a/engine/studies/mixed_lh.py b/engine/studies/mixed_lh.py new file mode 100644 index 0000000..33ea2ee --- /dev/null +++ b/engine/studies/mixed_lh.py @@ -0,0 +1,106 @@ +"""mixed design: full factorial on primary factors, latin hypercube on secondary""" +import sys +sys.path.insert(0, "..") +import logging +from itertools import product +import json +import hashlib +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +import numpy as np +from scipy.stats.qmc import LatinHypercube +from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + +LH_SAMPLES = 10 + +def generate_configs(lh_samples: int = LH_SAMPLES): + primary = [f for f in FACTORS if f.primary] + secondary = [f for f in FACTORS if not f.primary] + + primary_grid = list(product(*[f.levels for f in primary])) + lhs = LatinHypercube(d=len(secondary), seed=42) + + configs = [] + for p_combo in primary_grid: + samples = lhs.random(n=lh_samples) + for s in samples: + sec_vals = { + secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))] + for i in range(len(secondary)) + } + base = {primary[i].name: p_combo[i] for i in range(len(primary))} + base.update(sec_vals) + + for seed in range(SEEDS_PER_CONFIG): + cfg = {**base, "seed": seed} + cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8] + configs.append(cfg) + return configs + +def run_single(cfg: dict) -> dict: + from engine.wrapper import PHANTOM + import numpy as np + + np.random.seed(cfg["seed"]) + demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]] + + env = PHANTOM( + n_products=cfg["n_products"], + alpha=cfg["alpha"], + N=cfg["N"], + ) + env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"])) + + obs, _ = env.reset() + total_reward, steps = 0.0, 0 + + for _ in range(100): + action = env.action_space.sample() + obs, reward, term, trunc, _ = env.step(action) + total_reward += reward + steps += 1 + if term: break + + env.close() + return { + "id": cfg["id"], + "config": cfg, + "total_reward": total_reward, + "avg_reward": total_reward / steps, + "steps": steps, + } + +def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES): + configs = generate_configs(lh_samples) + n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary])) + log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)") + + results = [] + with ProcessPoolExecutor(max_workers=max_workers) as ex: + for i, result in enumerate(ex.map(run_single, configs)): + results.append(result) + if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}") + + Path(output).write_text("\n".join(json.dumps(r) for r in results)) + log.info(f"wrote {len(results)} results to {output}") + return results + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument("--workers", type=int, default=None) + p.add_argument("--output", default="results_mixed.jsonl") + p.add_argument("--lh-samples", type=int, default=10) + p.add_argument("--dry-run", action="store_true", help="only show design size") + args = p.parse_args() + + primary = [f for f in FACTORS if f.primary] + secondary = [f for f in FACTORS if not f.primary] + configs = generate_configs(args.lh_samples) + log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}") + + if not args.dry_run: + run_study(args.workers, args.output, args.lh_samples)