mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
shock: defining new lab environment and formulation
This commit is contained in:
176
lab/case/thesis/platform.py
Normal file
176
lab/case/thesis/platform.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Thesis platform with real MDP behavioral models and separability scoring."""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from ...outlet import (Platform, PlatformConfig, PositionModel, PositionConfig,
|
||||
PostedPriceMechanism, make_instruments, InstrumentType, LogLevel)
|
||||
from ...outlet.mechanisms.posted_price import PostedPriceConfig
|
||||
from ...outlet.observation import DefaultObservationBuilder, ObservationConfig
|
||||
from .arrivals import ContaminatedArrivalModel, ContaminatedArrivalConfig
|
||||
from .execution import HybridExecutionModel, HybridExecutionConfig
|
||||
from .objectives import RobustStackelbergObjective, RobustObjectiveConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThesisConfig:
|
||||
# instruments
|
||||
n_instruments: int = 10
|
||||
cost_range: tuple[float, float] = (5.0, 50.0)
|
||||
margin_range: tuple[float, float] = (0.2, 0.5)
|
||||
|
||||
# contamination (Section 3.1)
|
||||
alpha_contamination: float = 0.2
|
||||
alpha_drift: float = 0.0
|
||||
alpha_bounds: tuple[float, float] = (0.0, 0.5)
|
||||
|
||||
# objectives (Eq 23)
|
||||
lambda_coi: float = 0.5
|
||||
lambda_ux: float = 0.1
|
||||
lambda_volatility: float = 0.2
|
||||
wasserstein_epsilon: float = 0.1
|
||||
|
||||
# arrivals
|
||||
sessions_per_step: int = 30
|
||||
human_views_range: tuple[int, int] = (1, 4)
|
||||
agent_views_range: tuple[int, int] = (3, 10)
|
||||
|
||||
# inventory
|
||||
initial_inventory: float = 100.0
|
||||
holding_cost_rate: float = 0.002
|
||||
|
||||
# real behavioral models (from sim.rl)
|
||||
use_real_behavior: bool = True
|
||||
use_separability: bool = False # disabled until classifier trained
|
||||
human_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data"
|
||||
agent_data_dir: str = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data"
|
||||
|
||||
# simulation
|
||||
max_steps: int = 500
|
||||
seed: int | None = 24
|
||||
log_level: LogLevel = LogLevel.AGG_ONLY
|
||||
|
||||
|
||||
def _resolve_data_dirs(cfg: ThesisConfig) -> tuple[str, str]:
|
||||
"""Resolve data directories for behavioral models."""
|
||||
base = Path(__file__).parent.parent.parent.parent / "experiments"
|
||||
human = cfg.human_data_dir or str(base / "collected_data")
|
||||
agent = cfg.agent_data_dir or str(base / "agents/collected_data")
|
||||
return human, agent
|
||||
|
||||
|
||||
def make_thesis_platform(cfg: ThesisConfig | None = None) -> Platform:
|
||||
"""Create platform with real MDP behavioral models.
|
||||
|
||||
Implements:
|
||||
- Contaminated arrivals using learned MDP kernels from behavior_loader
|
||||
- Hybrid execution with real separability scoring from lib.separability
|
||||
- Robust Stackelberg objective (Eq 23)
|
||||
"""
|
||||
cfg = cfg or ThesisConfig()
|
||||
rng = np.random.default_rng(cfg.seed)
|
||||
human_dir, agent_dir = _resolve_data_dirs(cfg)
|
||||
|
||||
instruments = make_instruments(
|
||||
n=cfg.n_instruments, cost_range=cfg.cost_range, margin_range=cfg.margin_range,
|
||||
inst_type=InstrumentType.SKU, rng=rng)
|
||||
instruments.position = np.full(cfg.n_instruments, cfg.initial_inventory)
|
||||
|
||||
arrival = ContaminatedArrivalModel(ContaminatedArrivalConfig(
|
||||
base_rate=cfg.sessions_per_step,
|
||||
alpha_contamination=cfg.alpha_contamination,
|
||||
alpha_drift=cfg.alpha_drift,
|
||||
alpha_bounds=cfg.alpha_bounds,
|
||||
human_views_range=cfg.human_views_range,
|
||||
agent_views_range=cfg.agent_views_range,
|
||||
use_real_behavior=cfg.use_real_behavior,
|
||||
human_data_dir=human_dir,
|
||||
agent_data_dir=agent_dir,
|
||||
))
|
||||
|
||||
execution = HybridExecutionModel(HybridExecutionConfig(
|
||||
use_separability=cfg.use_separability,
|
||||
))
|
||||
|
||||
mechanism = PostedPriceMechanism(PostedPriceConfig(max_delta_pct=0.15, min_margin_pct=0.05))
|
||||
position = PositionModel(PositionConfig(initial_position=cfg.initial_inventory, holding_cost_rate=cfg.holding_cost_rate))
|
||||
|
||||
market = None
|
||||
objective = RobustStackelbergObjective(RobustObjectiveConfig(
|
||||
lambda_coi=cfg.lambda_coi, lambda_ux=cfg.lambda_ux,
|
||||
lambda_volatility=cfg.lambda_volatility, wasserstein_epsilon=cfg.wasserstein_epsilon))
|
||||
|
||||
obs_builder = DefaultObservationBuilder(ObservationConfig(mask_true_demand=True))
|
||||
platform_cfg = PlatformConfig(n_instruments=cfg.n_instruments, max_steps=cfg.max_steps,
|
||||
seed=cfg.seed, log_level=cfg.log_level, mask_demand=True)
|
||||
|
||||
return Platform(instruments=instruments, mechanism=mechanism, arrival=arrival, execution=execution,
|
||||
position=position, market=market, obs_builder=obs_builder, objective=objective, cfg=platform_cfg)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AblationConfig(ThesisConfig):
|
||||
disable_coi_penalty: bool = False
|
||||
disable_ux_penalty: bool = False
|
||||
disable_contamination: bool = False
|
||||
disable_real_behavior: bool = False
|
||||
|
||||
|
||||
def make_ablation_platform(cfg: AblationConfig) -> Platform:
|
||||
if cfg.disable_coi_penalty:
|
||||
cfg.lambda_coi = 0.0
|
||||
if cfg.disable_ux_penalty:
|
||||
cfg.lambda_ux = 0.0
|
||||
if cfg.disable_contamination:
|
||||
cfg.alpha_contamination = 0.0
|
||||
if cfg.disable_real_behavior:
|
||||
cfg.use_real_behavior = False
|
||||
cfg.use_separability = False
|
||||
return make_thesis_platform(cfg)
|
||||
|
||||
|
||||
def sweep_contamination(alpha_values: list[float], base_cfg: ThesisConfig | None = None,
|
||||
n_steps: int = 100, seed: int = 42) -> dict[float, dict]:
|
||||
"""Test performance across contamination levels (Theorem 1 validation)."""
|
||||
from ...experiments.eval import rollout, fixed_price_policy
|
||||
|
||||
results = {}
|
||||
base_cfg = base_cfg or ThesisConfig()
|
||||
|
||||
for alpha in alpha_values:
|
||||
cfg = ThesisConfig(**{k: v for k, v in base_cfg.__dict__.items() if k != 'alpha_contamination'},
|
||||
alpha_contamination=alpha)
|
||||
platform = make_thesis_platform(cfg)
|
||||
policy = fixed_price_policy(platform.instruments.refs)
|
||||
result = rollout(platform, policy, n_steps, seed=seed)
|
||||
results[alpha] = {
|
||||
'total_reward': result.total_reward,
|
||||
'total_pnl': result.total_pnl,
|
||||
'avg_conversion': result.avg_conversion,
|
||||
'final_contamination': platform._hidden.contamination,
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def sweep_behavior_modes(base_cfg: ThesisConfig | None = None, n_steps: int = 100, seed: int = 42) -> dict[str, dict]:
|
||||
"""Compare real vs synthetic behavioral models."""
|
||||
from ...experiments.eval import rollout, fixed_price_policy
|
||||
|
||||
base_cfg = base_cfg or ThesisConfig()
|
||||
modes = {
|
||||
'real_mdp': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': True}),
|
||||
'synthetic': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': False, 'use_separability': False}),
|
||||
'real_mdp_no_sep': ThesisConfig(**{**base_cfg.__dict__, 'use_real_behavior': True, 'use_separability': False}),
|
||||
}
|
||||
|
||||
results = {}
|
||||
for name, cfg in modes.items():
|
||||
platform = make_thesis_platform(cfg)
|
||||
policy = fixed_price_policy(platform.instruments.refs)
|
||||
result = rollout(platform, policy, n_steps, seed=seed)
|
||||
results[name] = {
|
||||
'total_reward': result.total_reward,
|
||||
'total_pnl': result.total_pnl,
|
||||
'avg_conversion': result.avg_conversion,
|
||||
}
|
||||
return results
|
||||
Reference in New Issue
Block a user