Merge pull request #44 from velocitatem/agent-behavior-loader-developemen

Agent behavior loader developement + rl loop definition and e2e tests.
2026-07-15 17:43:36 +00:00 · 2026-01-31 10:21:54 +01:00
parent b5f19e04b7 9843c5deab
commit dba8f3fafa
71 changed files with 5073 additions and 751 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -5,18 +5,28 @@
 **/.virtual_documents/
 **/session_*.svg
 **/*graph.svg
-paper/src/bib/auto
+**/auto/*.el
 *.old
 **/package-lock.json
 **/*.parquet
 **/_build/
-# Airflow logs - exclude DAG run logs
+paper/src/bib/auto
 =======
 **/_build/
 paper/src/auto/*
 paper/src/bib/auto
 docs/goals/*.md
 PHANTOM.wiki/
 experiments/airflow/logs/*
 experiments/airflow/logs/scheduler/
 experiments/airflow/logs/dag_processor_manager/
-experiments/collected_data/*
+experiments/collected_data/
-
+experiments/agents/collected_data/
-paper/src/auto/*
+sim/rl/behavior_loader/*.dot
-lib/
+sim/rl/behavior_loader/*.png
-docs/goals/*.md
+sim/rl/behavior_loader/*.svg
-PHANTOM.wiki/
+sim/rl/behavior_loader/*.pdf
 tests/e2e/node_modules/**
-**/auto/*.el
+lab/case/thesis/runs*/
-*.old
+sim/case/thesis_simplified/runs*/
--- a/2
+++ b/2
@@ -49,8 +49,10 @@ test.backend: $(VENV)
 test.e2e:
 	@cd tests/e2e && npm install
 	@cd tests/e2e && npx playwright install chromium
 	@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
 	@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
 	@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
 	@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
 	@cd tests/e2e && npm test
 .PHONY: test.all
--- a/backend/provider/app.py
+++ b/backend/provider/app.py
@@ -47,53 +47,52 @@ def health() -> dict:
@app.get("/api/{mode}/price/{productId}", response_model=PriceResponse)
 def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)):
    """
    THIS is the fast lookup service (mechanism).
    Priority: session-keyed price > global optimal price > base price
    """
    product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0]
    if not product: raise HTTPException(404, f"Product {productId} not found")
    metadata = product['metadata']
    base_price = metadata.get('base_price', 100.0)
-    # fetch pre-computed prices from registry
+    # PRIORITY 1: session-aware price (computed by Airflow worker)
    if sessionId:
        session_price = registry.get_session_price(sessionId, productId)
        if session_price is not None:
            return PriceResponse(
                productId=productId,
                price=session_price,
                base_price=base_price,
                markup=session_price/base_price,
                elasticity=None,
                model_version='session-aware'
            )
    # PRIORITY 2: global pre-computed prices (surge pricing)
    prices_df = registry.get_prices('latest')
-    elasticity_df = registry.get_elasticity('latest')
+    if prices_df is not None:
-
+        product_price_row = prices_df[prices_df['productId'] == productId]
-    if prices_df is None:
+        if not product_price_row.empty:
-        # fallback: no pre-computed prices available
+            optimal_price = float(product_price_row['optimal_price'].iloc[0])
-        return PriceResponse(
+            return PriceResponse(
-            productId=productId,
+                productId=productId,
-            price=base_price,
+                price=optimal_price,
-            base_price=base_price,
+                base_price=base_price,
-            markup=1.0,
+                markup=optimal_price/base_price,
-            elasticity=None
+                elasticity=None,
-        )
+                model_version='surge'
-
+            )
    # lookup pre-computed price for this product
    product_price_row = prices_df[prices_df['productId'] == productId]
    if product_price_row.empty:
        # product not in pre-computed prices, fallback to base
        return PriceResponse(
            productId=productId,
            price=base_price,
            base_price=base_price,
            markup=1.0,
            elasticity=None
        )
    optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as  aresult
    # get elasticity if available
    product_elasticity = None
    if elasticity_df is not None:
        product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId]
        if not product_elasticity_row.empty:
            product_elasticity = float(product_elasticity_row['elasticity'].iloc[0])
    # PRIORITY 3: fallback to base price
    return PriceResponse(
        productId=productId,
-        price=optimal_price,
+        price=base_price,
        base_price=base_price,
-        markup=optimal_price/base_price,
+        markup=1.0,
-        elasticity=product_elasticity
+        elasticity=None,
        model_version='base'
    )
@app.get("/models")
--- a/backend/server/app.py
+++ b/backend/server/app.py
@@ -198,12 +198,16 @@ def dump_logs(
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            value_deserializer=lambda x: json.loads(x.decode('utf-8')),
-            consumer_timeout_ms=5000
+            consumer_timeout_ms=30000,
            fetch_max_wait_ms=10000,
            max_poll_records=1000
        )
        events = []
        for msg in consumer:
            events.append(msg.value)
            if last_n and len(events) >= last_n * 2:
                break
        consumer.close()
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -112,11 +112,14 @@ services:
    depends_on:
      - postgres
    environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
      - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
      - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
      - AIRFLOW__CORE__LOAD_EXAMPLES=false
      - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
      - AIRFLOW__CORE__PARALLELISM=16
      - AIRFLOW__CORE__DAG_CONCURRENCY=8
      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
      - _AIRFLOW_DB_MIGRATE=true
      - _AIRFLOW_WWW_USER_CREATE=true
      - _AIRFLOW_WWW_USER_USERNAME=admin
@@ -136,14 +139,20 @@ services:
      - airflow-init
      - redis
    environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
      - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
      - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
      - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
      - AIRFLOW__CORE__LOAD_EXAMPLES=false
      - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
      - AIRFLOW__CORE__PARALLELISM=16
      - AIRFLOW__CORE__DAG_CONCURRENCY=8
      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
      - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
      - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
      - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
      - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
      - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
      - KAFKA_HOST=kafka
      - KAFKA_PORT=29092
      - BACKEND_URL=http://backend:5000
@@ -173,13 +182,20 @@ services:
      redis:
        condition: service_started
    environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
      - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
      - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
      - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
      - AIRFLOW__CORE__LOAD_EXAMPLES=false
      - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
      - AIRFLOW__CORE__PARALLELISM=16
      - AIRFLOW__CORE__DAG_CONCURRENCY=8
      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
      - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
      - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
      - AIRFLOW__SCHEDULER__PARSING_PROCESSES=2
      - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
      - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
      - KAFKA_HOST=kafka
      - KAFKA_PORT=29092
      - BACKEND_URL=http://backend:5000
--- a/engine/engine.py
+++ b/engine/engine.py
@@ -0,0 +1,66 @@
 from sys import platform
 import numpy as np
 from .lib.demand import generate_demand, estimate_demand
 from .lib.behavior import sample_behavior
 from logging import INFO, getLogger
 logger = getLogger(__name__)
 logger.setLevel(INFO)
 class MarketEngine():
    def __init__(self,
                 alpha = 0.5,
                 N = 100,
                 demand_distribution = (50, 10),
                 demand_sampling_function = np.random.normal):
        self.Nagents = int(N*alpha)
        self.Nhumans = int(N*(1-alpha))
        self.demand = (demand_sampling_function, demand_distribution)
    def act(self, prices):
        demand = generate_demand(prices, *self.demand)
        sample_n = lambda n, human: [sample_behavior(demand, human=human) for _ in range(n)]
        human_t, agent_t = sample_n(self.Nhumans, True), sample_n(self.Nagents, False)
        trajectories = human_t + agent_t
        demand_estimate = estimate_demand(trajectories)
        return demand_estimate
    def measure(self):
        pass
 class PricingEngine():
    def __init__(self,
                 ) -> None:
        pass
    def act(self, demand):
        return np.random.uniform(low=25, high=100, size=10)
 class Limbo():
    def __init__(self,
                 platform,
                 market
                 ) -> None:
        self.platform_turn = True
        self.platform = platform
        self.market = market
        self.output = None
    def step(self):
        # we could code golf this a little bit
        if self.platform_turn:
            self.output = self.platform.act(self.output)
        else:
            self.output = self.market.act(self.output)
        print(self.output)
        self.platform_turn = not self.platform_turn
 if __name__ == "__main__":
    platform = PricingEngine()
    market = MarketEngine()
    limbo = Limbo(platform, market)
    for _ in range(10):
        limbo.step()
--- a/engine/lib/init.py
+++ b/engine/lib/init.py
@@ -0,0 +1,3 @@
 from .demand import generate_demand, estimate_demand
 from .behavior import sample_behavior
 from .render import DashboardRenderer, style_axis
--- a/engine/lib/behavior.py
+++ b/engine/lib/behavior.py
@@ -0,0 +1,47 @@
 from sim.rl.behavior_loader.models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions
 import pandas as pd
 import numpy as np
 from .demand import generate_demand
 base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
 human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
 _cache = {}  # lazy cache for models and base pivots
 def _get_base_pivot(human: bool):
    key = 'human' if human else 'agent'
    if key not in _cache:
        model = BehaviorModel(human_dir) if human else AgentBehaviorModel(agent_dir)
        mdp = model.build_MDP()
        _cache[key] = pd.DataFrame(aggregate_event_transitions(mdp)).fillna(0.0)
    return _cache[key]
 def adjust_behavior_to_condition(condition, transition_matrix):
    # expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition
    cond_norm = condition / np.sum(condition)
    n_products = len(condition)
    base_vals = transition_matrix.values
    base_cols, base_rows = transition_matrix.columns.tolist(), transition_matrix.index.tolist()
    # expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm
    expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
    new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
    new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
    return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
 def sample_behavior(condition, human=True, max_len=40):
    base_pivot = _get_base_pivot(human)
    adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot)
    trajectory = [np.random.choice(adjusted_transitions.index)]
    while len(trajectory) < max_len or 'checkout' in trajectory[-1]:
        probs = adjusted_transitions.loc[trajectory[-1]].values
        sample = np.random.choice(adjusted_transitions.columns, p=probs/np.sum(probs) if np.sum(probs) > 0 else None)
        trajectory.append(sample)
    return trajectory
 if __name__ == "__main__":
    t=sample_behavior(generate_demand(np.array([10,20,30])), human=True)
    print(t)
    t=sample_behavior(generate_demand(np.array([10,20,30])), human=False)
    print(t)
--- a/engine/lib/demand.py
+++ b/engine/lib/demand.py
@@ -0,0 +1,45 @@
 import logging
 import numpy as np
 from logging import getLogger
 logger = getLogger(__name__)
 def generate_demand(prices, distribution_method = np.random.normal, distribution_params = (50.0, 10.0)):
    # assumption 1: each product has an intrinsic valuation drawn from a normal distribution centered at 50
    product_valuations = distribution_method(*distribution_params, size=len(prices))
    # assumption 2: demand decreases as price increases, following a simple linear model
    demand = np.maximum(0, product_valuations - prices) # demand cannot be negative
    total = np.sum(demand)
    demand = demand / total * 100 if total > 0 else demand  # normalize to percentage, avoid div by zero
    logger.info(f"Generated demand for prices {prices}: {demand} with valuations from distribution {distribution_params}")
    return demand
 def estimate_demand(trajectories):
    demand_estimate = {}
    for traj in trajectories:
        for event in traj:
            if 'view_product' in event:
                product_id = int(event.split('_')[-1].replace('product', ''))
                demand_estimate[product_id] = demand_estimate.get(product_id, 0) + 1
    total_views = sum(demand_estimate.values())
    for product_id in demand_estimate:
        demand_estimate[product_id] = (demand_estimate[product_id] / total_views) * 100  # normalize to percentage
    return demand_estimate
 # Example usage
 if __name__ == "__main__":
    np.random.seed(42)
    prices = np.array([20.0, 35.0, 50.0, 65.0])
    demand = generate_demand(prices)
    print("Generated Demand:", demand)
    from .behavior import sample_behavior
    N, alphat =200, 0.1
    trajectories = []
    for _ in range(int(N*(1 - alphat))):
        trajectories.append(sample_behavior(demand, human=True))
    for _ in range(int(N*alphat)):
        trajectories.append(sample_behavior(demand, human=False))
    demand_estimate = estimate_demand(trajectories)
    print("Estimated Demand from Behavior:", demand_estimate)
    delta = {k: demand_estimate.get(k, 0) - demand[i] for i, k in enumerate(range(len(prices)))}
    delta = np.mean([np.abs(v) for v in delta.values()])
    print("Demand Delta:", delta)
--- a/engine/lib/render.py
+++ b/engine/lib/render.py
@@ -0,0 +1,126 @@
 """rendering logic for PHANTOM environment dashboard"""
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.gridspec import GridSpec
 def style_axis(ax, title: str = None, xlabel: str = None, ylabel: str = None):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    if title: ax.set_title(title, fontsize=11, fontweight='bold', pad=8)
    if xlabel: ax.set_xlabel(xlabel, fontsize=9)
    if ylabel: ax.set_ylabel(ylabel, fontsize=9)
 class DashboardRenderer:
    """stateful renderer for PHANTOM market dynamics visualization"""
    def __init__(self):
        self.fig = None
        self.gs = None
    def render(self, env) -> None:
        if self.fig is None:
            plt.ion()
            self.fig = plt.figure(figsize=(14, 10))
            self.gs = GridSpec(3, 3, figure=self.fig, hspace=0.35, wspace=0.3,
                               left=0.07, right=0.95, top=0.92, bottom=0.08)
            plt.show(block=False)
        self.fig.clear()
        self.fig.suptitle(f'PHANTOM  Market Dynamics  [t={env._step_count}, a={env.alpha:.2f}]',
                          fontsize=14, fontweight='bold')
        demand_mat = np.array(env._demand_history).T
        price_mat = np.array(env._price_history).T
        elasticity = env._compute_elasticity()
        self._render_scatter(env)
        self._render_elasticity_bar(env, elasticity)
        self._render_session_pie(env)
        self._render_price_heatmap(price_mat)
        self._render_demand_heatmap(demand_mat)
        self._render_correlation(env.n_products, price_mat, demand_mat)
        self._render_revenue(env)
        self.fig.canvas.draw_idle()
        self.fig.canvas.flush_events()
    def _render_scatter(self, env):
        ax = self.fig.add_subplot(self.gs[0, 0])
        prices_flat = np.array(env._price_history).flatten()
        demands_flat = np.array(env._demand_history).flatten()
        product_ids = np.tile(np.arange(env.n_products), len(env._price_history))
        ax.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none')
        if len(prices_flat) > 1:
            z = np.polyfit(prices_flat, demands_flat, 1)
            p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50)
            ax.plot(p_line, np.polyval(z, p_line), '--', lw=1.5, alpha=0.8)
        style_axis(ax, "Price-Demand Relationship", "Price ($)", "Demand")
    def _render_elasticity_bar(self, env, elasticity):
        ax = self.fig.add_subplot(self.gs[0, 1])
        ax.barh(range(env.n_products), elasticity, alpha=0.8)
        ax.axvline(0, lw=0.8, alpha=0.5)
        ax.axvline(-1, lw=1, ls='--', alpha=0.5)
        ax.set_yticks(range(env.n_products))
        ax.set_yticklabels([f'P{i}' for i in range(env.n_products)], fontsize=7)
        style_axis(ax, "Price Elasticity", "(dQ/dP)(P/Q)", None)
    def _render_session_pie(self, env):
        ax = self.fig.add_subplot(self.gs[0, 2])
        n_h, n_a = env.market.Nhumans, env.market.Nagents
        wedges, _ = ax.pie([n_h, n_a], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': 'white'})
        ax.legend(wedges, [f'H ({n_h})', f'A ({n_a})'], loc='lower center', fontsize=8,
                  frameon=False, bbox_to_anchor=(0.5, -0.05))
        ax.set_title("Session Mix", fontsize=11, fontweight='bold')
    def _render_price_heatmap(self, price_mat):
        ax = self.fig.add_subplot(self.gs[1, :2])
        im = ax.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower')
        style_axis(ax, "Price Heatmap P(product, t)", "Step", "Product")
        cbar = self.fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02)
        cbar.set_label('$', fontsize=8)
    def _render_demand_heatmap(self, demand_mat):
        ax = self.fig.add_subplot(self.gs[1, 2])
        im = ax.imshow(demand_mat, aspect='auto', cmap='Blues', origin='lower')
        style_axis(ax, "Demand Q(product, t)", "Step", None)
        self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
    def _render_correlation(self, n_products, price_mat, demand_mat):
        ax = self.fig.add_subplot(self.gs[2, 0])
        if price_mat.shape[1] > 2:
            corr = np.corrcoef(price_mat, demand_mat)[:n_products, n_products:]
            im = ax.imshow(corr, cmap='RdBu', vmin=-1, vmax=1, aspect='auto')
            ax.set_xticks(range(n_products))
            ax.set_yticks(range(n_products))
            ax.set_xticklabels([f'Q{i}' for i in range(n_products)], fontsize=6)
            ax.set_yticklabels([f'P{i}' for i in range(n_products)], fontsize=6)
            self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
        style_axis(ax, "Price-Demand Correlation", None, None)
    def _render_revenue(self, env):
        ax = self.fig.add_subplot(self.gs[2, 1:])
        n_steps = len(env._revenue_history)
        demand_std = [np.std(d) for d in env._demand_history]
        ax.fill_between(range(n_steps), env._revenue_history, alpha=0.3)
        ax.plot(env._revenue_history, linewidth=2, label='Revenue')
        ax.set_xlim(0, max(n_steps, 1))
        ax.set_ylim(0, max(env._revenue_history) * 1.1 if env._revenue_history else 1)
        ax2 = ax.twinx()
        ax2.plot(range(n_steps), demand_std, linewidth=2, ls='-', alpha=0.9, label='sigma(Demand)')
        d_min, d_max = min(demand_std), max(demand_std)
        margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5
        ax2.set_ylim(max(0, d_min - margin), d_max + margin)
        ax2.set_ylabel('Demand sigma', fontsize=9)
        style_axis(ax, "Revenue & Demand Dispersion", "Step", "Revenue ($)")
        ax.legend(loc='upper left', fontsize=7, frameon=False)
        ax2.legend(loc='upper right', fontsize=7, frameon=False)
    def close(self):
        if self.fig:
            plt.close(self.fig)
            self.fig = None
--- a/engine/studies/factors.py
+++ b/engine/studies/factors.py
@@ -0,0 +1,34 @@
 """shared factor definitions for experimental designs"""
 import numpy as np
 from dataclasses import dataclass, field
 from typing import Callable, Any
@dataclass
 class Factor:
    name: str
    levels: list
    primary: bool = True  # full cross vs sampled
 # demand functions with compatible signatures
 def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size))
 def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size)
 def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size)
 def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size)
 DEMAND_FUNCTIONS = {
    "linear": demand_linear,
    "uniform": demand_uniform,
    "exponential": demand_exponential,
    "logistic": demand_logistic,
 }
 FACTORS = [
    Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True),
    Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True),
    Factor("n_products", [5, 15, 30, 50], primary=True),
    Factor("demand_mu", [30.0, 50.0, 70.0], primary=False),
    Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False),
    Factor("N", [100, 500, 1000], primary=False),
 ]
 SEEDS_PER_CONFIG = 5
--- a/engine/studies/full_factorial.py
+++ b/engine/studies/full_factorial.py
@@ -0,0 +1,89 @@
 """full factorial design - all factor combinations"""
 import sys
 sys.path.insert(0, "..")
 import logging
 from itertools import product
 import json
 import hashlib
 from pathlib import Path
 from concurrent.futures import ProcessPoolExecutor
 from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 log = logging.getLogger(__name__)
 def generate_configs():
    """generate all factor combinations with seeds"""
    all_levels = [f.levels for f in FACTORS]
    names = [f.name for f in FACTORS]
    configs = []
    for combo in product(*all_levels):
        base = {names[i]: combo[i] for i in range(len(names))}
        for seed in range(SEEDS_PER_CONFIG):
            cfg = {**base, "seed": seed}
            cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
            configs.append(cfg)
    return configs
 def run_single(cfg: dict) -> dict:
    """execute one experiment config, return metrics"""
    from engine.wrapper import PHANTOM
    import numpy as np
    np.random.seed(cfg["seed"])
    demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
    env = PHANTOM(
        n_products=cfg["n_products"],
        alpha=cfg["alpha"],
        N=cfg["N"],
    )
    env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
    obs, _ = env.reset()
    total_reward, steps = 0.0, 0
    for _ in range(100):
        action = env.action_space.sample()
        obs, reward, term, trunc, _ = env.step(action)
        total_reward += reward
        steps += 1
        if term: break
    env.close()
    return {
        "id": cfg["id"],
        "config": cfg,
        "total_reward": total_reward,
        "avg_reward": total_reward / steps if steps > 0 else 0.0,
        "steps": steps,
    }
 def run_study(max_workers: int = None, output: str = "results_full.jsonl"):
    configs = generate_configs()
    log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)")
    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        for i, result in enumerate(ex.map(run_single, configs)):
            results.append(result)
            if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
    Path(output).write_text("\n".join(json.dumps(r) for r in results))
    log.info(f"wrote {len(results)} results to {output}")
    return results
 if __name__ == "__main__":
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("--workers", type=int, default=None)
    p.add_argument("--output", default="results_full.jsonl")
    p.add_argument("--dry-run", action="store_true", help="only show design size")
    args = p.parse_args()
    configs = generate_configs()
    log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}")
    if not args.dry_run:
        run_study(args.workers, args.output)
--- a/engine/studies/mixed_lh.py
+++ b/engine/studies/mixed_lh.py
@@ -0,0 +1,106 @@
 """mixed design: full factorial on primary factors, latin hypercube on secondary"""
 import sys
 sys.path.insert(0, "..")
 import logging
 from itertools import product
 import json
 import hashlib
 from pathlib import Path
 from concurrent.futures import ProcessPoolExecutor
 import numpy as np
 from scipy.stats.qmc import LatinHypercube
 from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 log = logging.getLogger(__name__)
 LH_SAMPLES = 10
 def generate_configs(lh_samples: int = LH_SAMPLES):
    primary = [f for f in FACTORS if f.primary]
    secondary = [f for f in FACTORS if not f.primary]
    primary_grid = list(product(*[f.levels for f in primary]))
    lhs = LatinHypercube(d=len(secondary), seed=42)
    configs = []
    for p_combo in primary_grid:
        samples = lhs.random(n=lh_samples)
        for s in samples:
            sec_vals = {
                secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))]
                for i in range(len(secondary))
            }
            base = {primary[i].name: p_combo[i] for i in range(len(primary))}
            base.update(sec_vals)
            for seed in range(SEEDS_PER_CONFIG):
                cfg = {**base, "seed": seed}
                cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
                configs.append(cfg)
    return configs
 def run_single(cfg: dict) -> dict:
    from engine.wrapper import PHANTOM
    import numpy as np
    np.random.seed(cfg["seed"])
    demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
    env = PHANTOM(
        n_products=cfg["n_products"],
        alpha=cfg["alpha"],
        N=cfg["N"],
    )
    env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
    obs, _ = env.reset()
    total_reward, steps = 0.0, 0
    for _ in range(100):
        action = env.action_space.sample()
        obs, reward, term, trunc, _ = env.step(action)
        total_reward += reward
        steps += 1
        if term: break
    env.close()
    return {
        "id": cfg["id"],
        "config": cfg,
        "total_reward": total_reward,
        "avg_reward": total_reward / steps,
        "steps": steps,
    }
 def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES):
    configs = generate_configs(lh_samples)
    n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary]))
    log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)")
    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        for i, result in enumerate(ex.map(run_single, configs)):
            results.append(result)
            if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
    Path(output).write_text("\n".join(json.dumps(r) for r in results))
    log.info(f"wrote {len(results)} results to {output}")
    return results
 if __name__ == "__main__":
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("--workers", type=int, default=None)
    p.add_argument("--output", default="results_mixed.jsonl")
    p.add_argument("--lh-samples", type=int, default=10)
    p.add_argument("--dry-run", action="store_true", help="only show design size")
    args = p.parse_args()
    primary = [f for f in FACTORS if f.primary]
    secondary = [f for f in FACTORS if not f.primary]
    configs = generate_configs(args.lh_samples)
    log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}")
    if not args.dry_run:
        run_study(args.workers, args.output, args.lh_samples)
--- a/engine/train.py
+++ b/engine/train.py
@@ -0,0 +1,45 @@
 from stable_baselines3 import SAC
 from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
 from .wrapper import PHANTOM
 class RenderCallback(BaseCallback):
    """Renders environment on every step for live visualization."""
    def __init__(self, env: PHANTOM):
        super().__init__()
        self.env = env
    def _on_step(self) -> bool:
        self.env.render()
        return True
 env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
 eval_env = PHANTOM(n_products=10, alpha=0.3, render_mode=None)
 model = SAC(
    "MultiInputPolicy",
    env,
    verbose=1,
    learning_rate=3e-4,
    buffer_size=50000,
    batch_size=256,
    tau=0.005,
    gamma=0.99,
 )
 render_cb = RenderCallback(env)
 eval_cb = EvalCallback(eval_env, eval_freq=1000, n_eval_episodes=5, verbose=1)
 model.learn(total_timesteps=50000, callback=[render_cb, eval_cb])
 model.save("phantom_sac")
 # test trained policy
 env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
 obs, _ = env.reset()
 for _ in range(100):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, term, trunc, _ = env.step(action)
    env.render()
    if term or trunc: break
 env.close()
--- a/engine/wrapper.py
+++ b/engine/wrapper.py
@@ -0,0 +1,118 @@
 import gymnasium as gym
 from gymnasium import spaces
 import numpy as np
 from .engine import Limbo, MarketEngine, PricingEngine
 from .lib.render import DashboardRenderer
 class PHANTOM(gym.Env):
    """Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand."""
    metadata = {"render_modes": ["human", "ansi"]}
    def __init__(self,
                 n_products: int = 10,
                 alpha: float = 0.3,
                 N: int = 100,
                 price_bounds: tuple = (10.0, 150.0),
                 lambda_coi: float = 0.1,
                 render_mode: str = None):
        super().__init__()
        self.n_products = n_products
        self.price_bounds = price_bounds
        self.lambda_coi = lambda_coi
        self.render_mode = render_mode
        self.alpha = alpha
        self.N = N
        self.market = MarketEngine(alpha=alpha, N=N)
        self._platform_stub = PricingEngine()
        self._limbo = Limbo(self._platform_stub, self.market)
        self.action_space = spaces.Box(
            low=price_bounds[0], high=price_bounds[1],
            shape=(n_products,), dtype=np.float32
        )
        self.observation_space = spaces.Dict({
            "demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32),
            "prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32),
        })
        self._prices = None
        self._demand = None
        self._step_count = 0
        self._demand_history = []
        self._price_history = []
        self._revenue_history = []
        self._renderer = None
    def _get_obs(self) -> dict:
        demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
        return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
    def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
        revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
        # TODO: implement supra-competitive price punishment
        return float(revenue)
    def _record_history(self):
        demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
        self._demand_history.append(demand_arr)
        self._price_history.append(self._prices.copy())
        self._revenue_history.append(np.sum(self._prices * demand_arr))
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
        self._demand = self.market.act(self._prices)
        self._step_count = 0
        self._demand_history, self._price_history, self._revenue_history = [], [], []
        self._record_history()
        return self._get_obs(), {}
    def step(self, action: np.ndarray):
        self._prices = np.clip(action, *self.price_bounds)
        self._demand = self.market.act(self._prices)
        self._step_count += 1
        self._record_history()
        reward = self._compute_reward(self._prices, self._demand)
        terminated = self._step_count >= 100
        return self._get_obs(), reward, terminated, False, {"step": self._step_count}
    def _compute_elasticity(self) -> np.ndarray:
        """point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
        if len(self._price_history) < 2:
            return np.zeros(self.n_products)
        p, q = np.array(self._price_history), np.array(self._demand_history)
        dp, dq = np.diff(p, axis=0), np.diff(q, axis=0)
        valid = np.abs(dp) > 0.5
        with np.errstate(divide='ignore', invalid='ignore'):
            elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0)
            elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0)
        return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products)
    def render(self):
        if self.render_mode == "human":
            if self._renderer is None:
                self._renderer = DashboardRenderer()
            self._renderer.render(self)
        elif self.render_mode == "ansi":
            return f"step={self._step_count}, prices={self._prices}, demand={self._demand}"
        return None
    def close(self):
        if self._renderer:
            self._renderer.close()
            self._renderer = None
 if __name__ == "__main__":
    env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human")
    obs, _ = env.reset()
    for step in range(100):
        action = env.action_space.sample()
        obs, reward, term, trunc, info = env.step(action)
        env.render()
        if term: break
    env.close()
--- a/experiments/agents/run.py
+++ b/experiments/agents/run.py
@@ -0,0 +1,117 @@
 from supabase import create_client, Client
 import os
 import random
 import asyncio
 import json
 from dotenv import load_dotenv
 from experiments.agents.agent import get_agent, AgentTypes
 from lib.kafka_client import get_interactions
 load_dotenv()
 RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
 client = create_client(
    os.getenv("NEXT_PUBLIC_SUPABASE_URL"),
    os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
 )
 def pick_random_task():
    mode = 'hotel'
    tasks = client.table("tasks").select("*").execute().data
    if mode == 'hotel':
        # drop all that have 'flight' in the description
        tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()]
    return random.choice(tasks) if tasks else None
 def clear_kafka_data():
    """Delete and recreate Kafka topics to clear all data"""
    from kafka.admin import KafkaAdminClient, NewTopic
    from kafka.errors import UnknownTopicOrPartitionError
    import time
    kafka_host = os.getenv('KAFKA_HOST', 'localhost')
    kafka_port = os.getenv('KAFKA_PORT', '9092')
    broker = f'{kafka_host}:{kafka_port}'
    admin = KafkaAdminClient(bootstrap_servers=broker)
    topics = ['user-interactions', 'price-logs']
    try:
        admin.delete_topics(topics, timeout_ms=5000)
        print(f"Deleted topics: {topics}")
        time.sleep(2)
    except UnknownTopicOrPartitionError:
        print("Topics don't exist, skipping delete")
    except Exception as e:
        print(f"Error deleting topics: {e}")
    new_topics = [
        NewTopic(name='user-interactions', num_partitions=3, replication_factor=1),
        NewTopic(name='price-logs', num_partitions=3, replication_factor=1)
    ]
    try:
        admin.create_topics(new_topics=new_topics, validate_only=False)
        print(f"Recreated topics: {topics}")
    except Exception as e:
        print(f"Error creating topics: {e}")
    finally:
        admin.close()
 def create_new_experiment(task_id):
    import uuid
    subject_name = f"agent_{str(uuid.uuid4())[:8]}"
    experiment = {
        "subject_name": subject_name,
        "xp_human_only": False,
        "xp_market_mode": "hotel",
        "xp_task_id": task_id,
    }
    response = client.table("experiments").insert(experiment).execute()
    return response.data[0] if response.data else None
 if __name__ == "__main__":
    clear_kafka_data()
    task = pick_random_task()
    if not task:
        print("No tasks available")
        exit(1)
    experiment = create_new_experiment(task['id'])
    exp_id = experiment['id']
    exp_dir = f"{RESULTS}{exp_id}"
    os.makedirs(exp_dir, exist_ok=True)
    # construct experiment URL with uuid param
    base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000')
    agent_url = f"{base_url}/start-task?uuid={exp_id}"
    print(f"Created experiment {exp_id} for task {task['id']}")
    print(f"Agent will interact with: {agent_url}")
    # instantiate and run agent
    agent = get_agent(
        AgentTypes.GENERIC_BROWSER_USE_AGENT,
        goal=task['task_description'],
        url=agent_url,
        timeout=300,
        headless=True
    )
    result = asyncio.run(agent.act())
    print(f"Agent result: {result}")
    # export interaction and price data from kafka
    interactions = get_interactions(topic='user-interactions', timeout_ms=3000)
    prices = get_interactions(topic='price-logs', timeout_ms=3000)
    with open(f"{exp_dir}/int.json", 'w') as f:
        json.dump(interactions, f, indent=2)
    with open(f"{exp_dir}/price.json", 'w') as f:
        json.dump(prices, f, indent=2)
    print(f"Experiment {exp_id} completed.")
    print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}")
--- a/experiments/airflow/dags/surge_pricing_factory.py
+++ b/experiments/airflow/dags/surge_pricing_factory.py
@@ -1,3 +1,4 @@
 from pandas.core.algorithms import factorize_array
 from airflow import DAG
 from airflow.operators.python import PythonOperator
 from airflow.utils.dates import days_ago
@@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG:
 # instantiate DAGs for Airflow to discover
 dag_airline = create_surge_pricing_dag('airline')
 dag_hotel = create_surge_pricing_dag('hotel')
 # TODO: Refactor this factory from a surge pricing factory to a general pricing factory
 # We will do this by passing a pricing strategy class to the factory, since the generic pipeline is:
 # take all interaction data, group by sessionId and assign a new price vector to each session
 # in the grouping we get a subset of the interactions per sessionId and we can map that to some Features
 # we define a custom _get_features(interactions .) methodin the strategy class
 # we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector
 # this we then publish for each sessionId group
 # this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes.
--- a/experiments/airflow/dags/surge_pricing_pipeline.py
+++ b/experiments/airflow/dags/surge_pricing_pipeline.py
@@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs):
    # rename demand_score to demand for pricer compatibility
    data = product_features.rename(columns={'demand_score': 'demand'})
    high_thresh = dag_conf.get('high_threshold', 10)
    low_thresh = dag_conf.get('low_threshold', 2)
    surge_mult = dag_conf.get('surge_multiplier', 1.2)
    discount_mult = dag_conf.get('discount_multiplier', 0.9)
    logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}")
    logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}")
    logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}")
    logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}")
    surge_pricer = SimpleSurgePricer(
-        high_threshold=dag_conf.get('high_threshold', 10),
+        high_threshold=high_thresh,
-        low_threshold=dag_conf.get('low_threshold', 2),
+        low_threshold=low_thresh,
-        surge_multiplier=dag_conf.get('surge_multiplier', 1.2),
+        surge_multiplier=surge_mult,
-        discount_multiplier=dag_conf.get('discount_multiplier', 0.9)
+        discount_multiplier=discount_mult
    )
    surge_pricer.fit(data)
    data['optimal_price'] = surge_pricer.predict()
    base_avg = data['base_price'].mean()
    optimal_avg = data['optimal_price'].mean()
    price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100
    logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%")
    prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={
        'price': 'current_price',
        'demand': 'demand_score'
--- a/experiments/ml/init.py
+++ b/experiments/ml/init.py
@@ -1,11 +1,21 @@
 from .evals import evaluate
 from .arch import (
    XGBoostAgentClassifier,
-    LightGBMAgentClassifier
+    LightGBMAgentClassifier,
    ContrastiveWeakClassifier,
    TrajectoryEncoder,
    WeakClassifier,
    contrastive_loss,
    featurize_trajectory,
 )
-__all__ =[
+__all__ = [
    'evaluate',
    'XGBoostAgentClassifier',
-    'LightGBMAgentClassifier'
+    'LightGBMAgentClassifier',
    'ContrastiveWeakClassifier',
    'TrajectoryEncoder',
    'WeakClassifier',
    'contrastive_loss',
    'featurize_trajectory',
 ]
--- a/experiments/ml/arch.py
+++ b/experiments/ml/arch.py
@@ -1,122 +1,212 @@
 # sklearn compatible models for agent detection
 from sklearn.base import BaseEstimator, ClassifierMixin
-from procesing.context import PipelineContext
+from typing import Any, Optional, Tuple, Dict, List
 from typing import Any, Optional, Tuple
 from abc import ABC, abstractmethod
-import xgboost as xgb
+from collections import defaultdict
 import lightgbm as lgb
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import sys
 from pathlib import Path
 # add lib to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib'))
 from lib.features import (
    transition_histogram as _lib_transition_histogram,
    temporal_signature as _lib_temporal_signature,
    state_coverage as _lib_state_coverage,
    transition_entropy as _lib_transition_entropy,
    featurize_trajectory as _lib_featurize_trajectory,
    parse_timestamp
 )
 from lib.state import event_to_state, get_event_name, get_timestamp
 TASK = 'classification'
 LABELS = ['human', 'agent']
-class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC):
+class WeakClassifier(BaseEstimator, ClassifierMixin, ABC):
-    """Base class for tree-based agent detection classifiers with common logic"""
+    # a simple contrastive machine learning model learns to distinguish human/agent behavior
    # using weakly supervised contrastive learning + augmentation
    def __init__(self, **kwargs):
        super().__init__()
        self.model = None
        self.kwargs = kwargs
-    def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200,
+
-                 max_depth: int = 6, learning_rate: float = 0.05,
+class TrajectoryEncoder(nn.Module):
-                 early_stopping_rounds: int = 20):
+    """Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM"""
-        self.context = context
+    def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64):
        super().__init__()
        self.event_embed = nn.Linear(input_dim, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.proj = nn.Linear(hidden_dim * 2, embed_dim)
    def forward(self, x: torch.Tensor) -> torch.Tensor:  # x: (batch, seq_len, input_dim)
        h = F.relu(self.event_embed(x))
        _, (hn, _) = self.lstm(h)
        hn = torch.cat([hn[-2], hn[-1]], dim=1)  # concat bidirectional hidden states
        return F.normalize(self.proj(hn), dim=1)  # L2 normalized
 class ContrastiveWeakClassifier(WeakClassifier):
    """Contrastive learning classifier for human/agent trajectory discrimination"""
    def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs):
        super().__init__(**kwargs)
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.margin = margin
        self.encoder = TrajectoryEncoder(input_dim, embed_dim)
        self.classifier = nn.Linear(embed_dim, 2)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self._fitted = False
    def to_device(self):
        self.encoder.to(self.device)
        self.classifier.to(self.device)
        return self
    def encode(self, x: torch.Tensor) -> torch.Tensor:
        return self.encoder(x.to(self.device))
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        emb = self.encode(x)
        return self.classifier(emb)
    def fit(self, X, y=None):  # sklearn interface - actual training in weak.train.py
        self._fitted = True
        return self
    def predict(self, X: np.ndarray) -> np.ndarray:
        self.encoder.eval()
        self.classifier.eval()
        with torch.no_grad():
            x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
            logits = self.forward(x)
            return torch.argmax(logits, dim=1).cpu().numpy()
    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        self.encoder.eval()
        self.classifier.eval()
        with torch.no_grad():
            x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
            logits = self.forward(x)
            return F.softmax(logits, dim=1).cpu().numpy()
 def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor:
    """Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range."""
    pos_sim = F.cosine_similarity(anchor, positive)  # higher = more similar
    neg_sim = F.cosine_similarity(anchor, negative)
    return F.relu(neg_sim - pos_sim + margin).mean()  # want pos_sim > neg_sim + margin
 def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor:
    """Normalized temperature-scaled cross entropy loss (SimCLR style)"""
    batch_size = z_i.size(0)
    z = torch.cat([z_i, z_j], dim=0)  # (2N, embed_dim)
    sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature
    mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device)
    sim.masked_fill_(mask, -float('inf'))
    labels = torch.arange(batch_size, device=z.device)
    labels = torch.cat([labels + batch_size, labels])  # positive pairs
    return F.cross_entropy(sim, labels)
 # feature extraction utilities - delegating to lib.features for unified implementation
 # these wrappers maintain backwards compatibility for existing imports
 def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray:
    """Compute normalized histogram of state transitions in trajectory"""
    return _lib_transition_histogram(events, state_fn, max_states)
 def temporal_signature(events: List, ts_fn) -> np.ndarray:
    """Extract temporal features: mean/std/skew of inter-event times"""
    return _lib_temporal_signature(events, ts_fn)
 def state_coverage(events: List, state_fn, mdp_states: set) -> float:
    """Fraction of MDP states visited by trajectory"""
    return _lib_state_coverage(events, state_fn, mdp_states)
 def transition_entropy(events: List, state_fn) -> float:
    """Compute entropy of transition distribution (randomness of navigation)"""
    return _lib_transition_entropy(events, state_fn)
 def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray:
    """Convert trajectory to fixed-dim feature vector - uses lib.features implementation"""
    mdp_states = set(mdp.get('states', [])) if mdp else set()
    def _ts_fn(e):
        return parse_timestamp(get_timestamp(e))
    def _event_name_fn(e):
        return get_event_name(e)
    return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim)
 # gradient boosting classifiers for comparison baselines
 class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin):
    """XGBoost classifier for human/agent detection from session features"""
    def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
-        self.early_stopping_rounds = early_stopping_rounds
+        self.model = None
-        self.model_ = None
+        self.kwargs = kwargs
        self.feature_names_ = None
    def _to_array(self, X):
        """Convert pandas structures to numpy arrays"""
        return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X
    def _compute_pos_weight(self, y_arr):
        """Calculate scale_pos_weight for class imbalance handling"""
        n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum()
        return n_neg / n_pos if n_pos > 0 else 1.0
    def _prepare_eval_set(self, eval_set):
        """Convert eval_set to numpy arrays if needed"""
        if not eval_set:
            return None
        X_val, y_val = eval_set[0]
        return [(self._to_array(X_val), self._to_array(y_val))]
    @abstractmethod
    def _build_model(self, scale_pos: float):
        """Build the underlying model instance (must be implemented by subclasses)"""
        pass
    @abstractmethod
    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
        """Fit model with evaluation set (must be implemented by subclasses)"""
        pass
    def fit(self, X, y, eval_set=None):
        X_arr, y_arr = self._to_array(X), self._to_array(y)
        if isinstance(X, pd.DataFrame):
            self.feature_names_ = X.columns.tolist()
        scale_pos = self._compute_pos_weight(y_arr)
        self.model_ = self._build_model(scale_pos)
        eval_arr = self._prepare_eval_set(eval_set)
        if eval_arr:
            self._fit_with_eval(X_arr, y_arr, eval_arr)
        else:
            self.model_.fit(X_arr, y_arr)
    def fit(self, X: np.ndarray, y: np.ndarray):
        try:
            import xgboost as xgb
            self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
                                           learning_rate=self.learning_rate, **self.kwargs)
            self.model.fit(X, y)
        except ImportError:
            raise ImportError("xgboost required for XGBoostAgentClassifier")
        return self
-    def predict(self, X):
+    def predict(self, X: np.ndarray) -> np.ndarray:
-        return self.model_.predict(self._to_array(X))
+        if self.model is None:
            raise ValueError("fit the model first")
        return self.model.predict(X)
-    def predict_proba(self, X):
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
-        return self.model_.predict_proba(self._to_array(X))
+        if self.model is None:
-
+            raise ValueError("fit the model first")
-    @property
+        return self.model.predict_proba(X)
    def feature_importances_(self):
        return self.model_.feature_importances_ if self.model_ else None
-class XGBoostAgentClassifier(BaseAgentClassifier):
+class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin):
-    """XGBoost binary classifier for agent detection with class imbalance handling"""
+    """LightGBM classifier for human/agent detection from session features"""
    def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.model = None
        self.kwargs = kwargs
-    def _build_model(self, scale_pos: float):
+    def fit(self, X: np.ndarray, y: np.ndarray):
-        return xgb.XGBClassifier(
+        try:
-            n_estimators=self.n_estimators,
+            import lightgbm as lgb
-            max_depth=self.max_depth,
+            self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
-            learning_rate=self.learning_rate,
+                                            learning_rate=self.learning_rate, verbose=-1, **self.kwargs)
-            scale_pos_weight=scale_pos,
+            self.model.fit(X, y)
-            eval_metric='auc',
+        except ImportError:
-            early_stopping_rounds=self.early_stopping_rounds,
+            raise ImportError("lightgbm required for LightGBMAgentClassifier")
-            random_state=42,
+        return self
            tree_method='hist',
            enable_categorical=False
        )
-    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
+    def predict(self, X: np.ndarray) -> np.ndarray:
-        self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False)
+        if self.model is None:
            raise ValueError("fit the model first")
        return self.model.predict(X)
-
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
-class LightGBMAgentClassifier(BaseAgentClassifier):
+        if self.model is None:
-    """LightGBM binary classifier for agent detection with class imbalance handling"""
+            raise ValueError("fit the model first")
-
+        return self.model.predict_proba(X)
    def _build_model(self, scale_pos: float):
        return lgb.LGBMClassifier(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            learning_rate=self.learning_rate,
            scale_pos_weight=scale_pos,
            metric='auc',
            random_state=42,
            verbosity=-1
        )
    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
        self.model_.fit(
            X_arr, y_arr,
            eval_set=eval_arr,
            callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)]
        )
--- a/experiments/ml/weak_train.py
+++ b/experiments/ml/weak_train.py
@@ -0,0 +1,246 @@
 import sys
 sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader")
 sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml")
 from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel
 from sim.rl.behavior_loader.models import JointBehaviorModel
 from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory
 from typing import List, Optional, Dict
 from datetime import datetime, timedelta
 from copy import deepcopy
 import numpy as np
 import random
 import torch
 from torch.utils.data import Dataset, DataLoader
 from torch.optim import Adam
 from torch.utils.tensorboard import SummaryWriter
 RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs"
 agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
 human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
 def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel:
    """Add random jitter to event timestamp"""
    new_evt = deepcopy(evt)
    try:
        ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00'))
        delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms))
        new_evt.ts = (ts + delta).isoformat()
    except:
        pass
    return new_evt
 def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]:
    """Apply random augmentation to trajectory for contrastive learning"""
    if len(trajectory) < 2:
        return trajectory
    aug_type = random.choice(['window', 'shuffle', 'noise', 'drop'])
    if aug_type == 'window':  # random contiguous sub-sequence (70-100% length)
        min_len = max(2, int(len(trajectory) * 0.7))
        sub_len = random.randint(min_len, len(trajectory))
        start = random.randint(0, len(trajectory) - sub_len)
        return trajectory[start:start + sub_len]
    elif aug_type == 'shuffle':  # swap adjacent pairs with probability rate
        result = list(trajectory)
        for i in range(len(result) - 1):
            if random.random() < rate:
                result[i], result[i + 1] = result[i + 1], result[i]
        return result
    elif aug_type == 'drop':  # drop events with probability rate
        result = [e for e in trajectory if random.random() > rate]
        return result if len(result) >= 2 else trajectory[:2]
    elif aug_type == 'noise':  # perturb timestamps
        return [_perturb_ts(e, jitter_ms=500) for e in trajectory]
    return trajectory
 class TripletDataset(Dataset):
    """Generate (anchor, positive, negative) triplets on-the-fly with augmentation"""
    def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10):
        self.sessions = list(data.items())
        self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')]
        self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')]
        self.mdp = mdp
        self.augment = augment_fn
        self.input_dim = input_dim
        self.multiplier = multiplier
        if not self.human_ids or not self.agent_ids:
            raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions")
    def __len__(self) -> int:
        return len(self.sessions) * self.multiplier
    def __getitem__(self, idx: int):
        anchor_idx = idx % len(self.sessions)
        sid, events = self.sessions[anchor_idx]
        is_human = sid.startswith('human_')
        anchor = featurize_trajectory(events, self.mdp, self.input_dim)
        positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim)
        neg_pool = self.agent_ids if is_human else self.human_ids
        neg_idx = random.choice(neg_pool)
        negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim)
        label = 0 if is_human else 1  # 0=human, 1=agent
        return (torch.tensor(anchor, dtype=torch.float32),
                torch.tensor(positive, dtype=torch.float32),
                torch.tensor(negative, dtype=torch.float32),
                torch.tensor(label, dtype=torch.long))
 def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64,
          embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None):
    """Train contrastive weak classifier on human/agent trajectories"""
    joint = JointLoader(human_dir, agent_dir)
    data = joint.get_data()
    if verbose:
        print(f"Loaded {len(data)} sessions")
    joint_model = JointBehaviorModel(human_dir, agent_dir)
    ref_mdp = joint_model.build_MDP()
    dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
    model.to_device()
    run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
    writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}")
    optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
    ce_loss_fn = torch.nn.CrossEntropyLoss()
    best_loss = float('inf')
    for epoch in range(epochs):
        model.encoder.train()
        model.classifier.train()
        total_loss, n_batches = 0.0, 0
        for anchor, positive, negative, labels in loader:
            anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]]
            z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]]
            trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin)
            ce = ce_loss_fn(model.classifier(z_a), labels)
            loss = trip_loss + 0.5 * ce
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            n_batches += 1
        avg_loss = total_loss / max(n_batches, 1)
        writer.add_scalar('loss', avg_loss, epoch)
        if verbose and (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
        if avg_loss < best_loss:
            best_loss = avg_loss
    writer.close()
    if verbose:
        print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}")
    return model, ref_mdp
 def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50,
                   lr: float = 1e-3, margin: float = 0.3, run_name: str = None):
    """Leave-one-out cross-validation given limited samples"""
    joint = JointLoader(human_dir, agent_dir)
    data = joint.get_data()
    session_ids = list(data.keys())
    joint_model = JointBehaviorModel(human_dir, agent_dir)
    ref_mdp = joint_model.build_MDP()
    run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
    writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}")
    predictions, actuals = [], []
    for fold_idx, test_sid in enumerate(session_ids):
        train_data = {k: v for k, v in data.items() if k != test_sid}
        test_events = data[test_sid]
        test_label = 0 if test_sid.startswith('human_') else 1
        n_human = sum(1 for k in train_data if k.startswith('human_'))
        n_agent = sum(1 for k in train_data if k.startswith('agent_'))
        if n_human == 0 or n_agent == 0:
            continue
        try:
            dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5)
            loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True)
            model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
            model.to_device()
            optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
            model.encoder.train()
            model.classifier.train()
            for _ in range(epochs_per_fold):
                for anchor, positive, negative, labels in loader:
                    z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]]
                    loss = contrastive_loss(z_a, z_p, z_n, margin=margin)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
            test_feat = featurize_trajectory(test_events, ref_mdp, input_dim)
            pred = model.predict(test_feat.reshape(1, -1))[0]
            predictions.append(pred)
            actuals.append(test_label)
            print(f"  {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}")
        except Exception as e:
            print(f"Error: {e}")
    if predictions:
        acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions)
        tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1)
        fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0)
        fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1)
        prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1)
        f1 = 2 * prec * rec / max(prec + rec, 1e-10)
        writer.add_scalar('accuracy', acc, 0)
        writer.add_scalar('f1', f1, 0)
        writer.add_scalar('precision', prec, 0)
        writer.add_scalar('recall', rec, 0)
        writer.close()
        print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}")
        return acc, predictions, actuals
    writer.close()
    return 0.0, [], []
 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['train', 'eval'], default='train')
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--margin', type=float, default=0.3)
    parser.add_argument('--input-dim', type=int, default=64)
    parser.add_argument('--embed-dim', type=int, default=32)
    parser.add_argument('--run-name', type=str, default=None)
    args = parser.parse_args()
    if args.mode == 'train':
        model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim,
                           embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name)
    else:
        evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs,
                       lr=args.lr, margin=args.margin, run_name=args.run_name)
--- a/experiments/procesing/contaminator.py
+++ b/experiments/procesing/contaminator.py
@@ -0,0 +1,114 @@
 from __future__ import annotations
 import os
 import random
 from pathlib import Path
 from types import SimpleNamespace
 import pandas as pd
 from lib.separability import estimate_alpha, load_artifacts, score_session
 # use relative import when in package context, fallback for standalone
 try:
    from sim.rl.behavior_loader.models import AgentBehaviorModel
 except ImportError:
    import sys
    sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader"))
    from models import AgentBehaviorModel
 # paths should be configurable via environment or relative to project root
 PROJECT_ROOT = Path(__file__).parent.parent.parent
 AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data"))
 try:
    SEPARABILITY_ARTIFACTS = load_artifacts()
 except FileNotFoundError:
    SEPARABILITY_ARTIFACTS = None
 def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame:
    """remap column values according to mapping dict, preserving unmapped values"""
    df = df.copy()
    df[on] = df[on].map(mapping).fillna(df[on])
    return df
 def _states_to_events(states: list[str]) -> list[SimpleNamespace]:
    events: list[SimpleNamespace] = []
    for idx, state in enumerate(states):
        parts = state.split("|") if isinstance(state, str) else ["page", "product", str(state)]
        page = f"/{parts[0]}" if parts else "/"
        product = parts[1] if len(parts) > 1 else "unknown"
        event_name = parts[2] if len(parts) > 2 else parts[-1]
        events.append(
            SimpleNamespace(
                eventName=event_name,
                page=page,
                productId=product,
                ts=float(idx),
            )
        )
    return events
 def contaminate_dataset(df: pd.DataFrame, on: str = "event_type",
                        contamination_rate: float = 0.1,
                        agent_data_dir: Path = None) -> pd.DataFrame:
    """inject synthetic agent trajectories into a dataset
    contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents)
    """
    data_dir = agent_data_dir or AGENT_DATA_DIR
    model = AgentBehaviorModel(str(data_dir))
    model.build_MDP()  # ensure MDP is built before sampling
    # compute event distribution from original data
    event_dist = df[on].value_counts(normalize=True).to_dict()
    total = sum(event_dist.values())
    event_dist = {k: v / total for k, v in event_dist.items()}
    # calculate how many synthetic events to add
    N = len(df)
    N_final = N / (1 - contamination_rate)
    N_contaminate = int(N_final - N)
    # sample start states weighted by original distribution
    start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate)
    # generate synthetic trajectories
    new_rows = []
    alpha_estimates = []
    for start_event in start_events:
        # sample trajectory from agent model, using a state that contains the event type
        mdp_states = model.mdp.get('states', []) if model.mdp else []
        matching_starts = [s for s in mdp_states if start_event in s]
        if not matching_starts:
            continue  # skip if no matching start state
        start_state = random.choice(matching_starts)
        trajectory = model.sample_traj(start_state, max_len=20)
        score_payload: list[SimpleNamespace] = []
        score: dict[str, float] = {}
        if SEPARABILITY_ARTIFACTS:
            score_payload = _states_to_events(trajectory)
            score = score_session(score_payload, SEPARABILITY_ARTIFACTS)
            alpha_estimates.append(
                estimate_alpha(score["prob_agent"], score["delta_h"], score["delta_a"], temperature=2.0)
            )
        for state in trajectory:
            parts = state.split('|') if isinstance(state, str) else [start_event]
            new_rows.append({
                on: parts[-1] if parts else start_event,
                'source': 'synthetic_agent',
                'prob_agent': score.get('prob_agent') if SEPARABILITY_ARTIFACTS and score_payload else None,
                'delta_h': score.get('delta_h') if SEPARABILITY_ARTIFACTS and score_payload else None,
                'delta_a': score.get('delta_a') if SEPARABILITY_ARTIFACTS and score_payload else None,
            })
    if new_rows:
        contaminate_df = pd.DataFrame(new_rows)
        df = pd.concat([df, contaminate_df], ignore_index=True)
        if alpha_estimates:
            df['estimated_alpha'] = sum(alpha_estimates) / len(alpha_estimates)
    return df
--- a/experiments/procesing/pricers/base.py
+++ b/experiments/procesing/pricers/base.py
@@ -7,15 +7,6 @@ import pandas as pd
 class PricingFunction(ABC):
    """
    Abstract base for pricing functions.
    Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
    Where:
        Q_t ∈ R^n: demand vector at time t
        P_t ∈ R^n: price vector at time t
        S_t: session features (behavioral signals, interactions)
        H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
    Objective:
        maximize E[R_T] = E[Σ P_t^T · Q_t]
        subject to:
@@ -28,10 +19,10 @@ class PricingFunction(ABC):
    def fit(self, *kwargs):
        """
        Offline training on historical data.
        This is where we can think about some maximization of expected revenue
        over historical trajectories to learn parameters of the pricing function.
        (This however we cover move in the RL side of things)
        Args:
            historical_data: DataFrame with elasticity, prices, demand signals
            **kwargs: additional training parameters
        """
        pass
@@ -39,12 +30,18 @@ class PricingFunction(ABC):
    def predict(self, *kwargs) -> np.ndarray:
        """
        Generate optimal prices given current state.
        This is an abstract method that transitions from τ -> P*
        which is the mapping from the trajectory to optimal prices under
        some subset of session grouping (so, per sessionId)
        """
        pass
-        Args:
+    @abstractmethod
-            state_space: StateSpace object containing Q_t, P_t, S_t, H_t
+    def _get_features(self, *kwargs) -> np.ndarray:
-
+        """
        Extract features from trajectory for pricing decision.
        Returns:
-            P_{t+1}: price vector in R^n
+            np.ndarray of shape (n_products, n_features)
        """
        pass
--- a/experiments/procesing/pricers/elasticity.py
+++ b/experiments/procesing/pricers/elasticity.py
@@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction):
        # enforce bounds
        prices = np.clip(prices, self.price_floor, self.price_ceil)
        return prices
    def _get_features(self, state_space=None) -> np.ndarray:
        """Extract elasticity, demand, and demand deviation for each product"""
        if state_space is None or self.elasticity is None:
            n = len(self.elasticity) if self.elasticity is not None else 0
            return np.zeros((n, 3))
        demand = np.asarray(state_space.demand)
        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
        return np.column_stack([self.elasticity, demand, demand_dev])
--- a/experiments/procesing/pricers/session_aware.py
+++ b/experiments/procesing/pricers/session_aware.py
@@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction):
        return prices
    def _get_features(self, state_space=None) -> np.ndarray:
        """Extract elasticity, demand, and session features"""
        if state_space is None or self.elasticity is None:
            n = len(self.elasticity) if self.elasticity is not None else 0
            return np.zeros((n, 5))
        demand = np.asarray(state_space.demand)
        n_products = len(demand)
        # extract session features
        velocity = 0.0
        view_depth = 0.0
        cart_to_view = 0.0
        if not state_space.session_features.empty:
            sf = state_space.session_features.iloc[0]
            velocity = sf.get('interaction_velocity', 0.0)
            view_depth = sf.get('product_view_depth', 0.0)
            cart_to_view = sf.get('cart_to_view_ratio', 0.0)
        # broadcast session features to all products
        features = np.column_stack([
            self.elasticity,
            demand,
            np.full(n_products, velocity),
            np.full(n_products, view_depth),
            np.full(n_products, cart_to_view)
        ])
        return features
 class ProductSpecificSessionPricer(PricingFunction):
    """
@@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction):
        prices = np.clip(base_prices, self.price_floor, self.price_ceil)
        return prices
    def _get_features(self, state_space=None) -> np.ndarray:
        """Extract elasticity and demand features for product-specific pricing"""
        if state_space is None or self.elasticity is None:
            n = len(self.elasticity) if self.elasticity is not None else 0
            return np.zeros((n, 2))
        demand = np.asarray(state_space.demand)
        return np.column_stack([self.elasticity, demand])
--- a/experiments/procesing/pricers/simple.py
+++ b/experiments/procesing/pricers/simple.py
@@ -3,6 +3,46 @@ import pandas as pd
 from procesing.pricers.base import PricingFunction
 def session_features_to_demand(session_features: pd.DataFrame) -> float:
    """
    Map session behavioral features to demand proxy.
    THIS is the critical θ̂ → D transformation for rule-based pricing.
    Logic:
      - High velocity → agent behavior → price up (revenue recovery)
      - High cart ratio → purchase intent → price up
      - Low activity → discount to convert
    Returns: demand proxy score (0-20 range, higher = more demand)
    """
    if session_features.empty:
        return 1.0
    feat = session_features.iloc[0] if len(session_features) > 0 else {}
    velocity = feat.get('interaction_velocity', 0)
    cart_ratio = feat.get('cart_to_view_ratio', 0)
    item_views = feat.get('item_views', 0)
    cart_adds = feat.get('cart_adds', 0)
    # baseline demand
    demand = 1.0
    # agent detection: high velocity → treat as high "demand" to price up
    if velocity > 2.0:
        demand += 10.0  # strong agent signal
    # conversion intent: cart interaction → price up
    if cart_ratio > 0.1 or cart_adds > 0:
        demand += 5.0
    # browsing depth: many views → interest signal
    if item_views > 3:
        demand += min(item_views, 5.0)
    return min(demand, 20.0)  # cap at 20
 class StaticPricer(PricingFunction):
    """Static pricing: always return fixed base prices"""
@@ -25,6 +65,11 @@ class StaticPricer(PricingFunction):
            raise ValueError("Must call fit() or provide base_prices in constructor")
        return self.base_prices.copy()
    def _get_features(self, state_space=None) -> np.ndarray:
        """Static pricer uses no features, returns empty array"""
        n = len(self.base_prices) if self.base_prices is not None else 0
        return np.zeros((n, 0))
 class RandomPricer(PricingFunction):
    """Random pricing within bounds (for baseline comparison)"""
@@ -47,6 +92,11 @@ class RandomPricer(PricingFunction):
            self.n_products = len(state_space.demand)
        return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)
    def _get_features(self, state_space=None) -> np.ndarray:
        """Random pricer uses no features"""
        n = self.n_products if self.n_products else 0
        return np.zeros((n, 0))
 class SimpleSurgePricer(PricingFunction):
    """
@@ -67,21 +117,25 @@ class SimpleSurgePricer(PricingFunction):
        self.surge_multiplier = surge_multiplier
        self.discount_multiplier = discount_multiplier
-    def fit(self, market_data : pd.DataFrame):
+    def fit(self, market_data: pd.DataFrame):
        """Extract base prices from product catalog or historical averages"""
        self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values
-        self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices)
+        return self
-    def predict(self) -> np.ndarray:
+    def predict(self, state_space) -> np.ndarray:
        """
        Adjust prices based on current demand using surge rules.
-        state_space.demand: demand counts per product
+        state_space.demand: demand proxy per product (from session features)
-        state_space.prices: current prices (fallback if base_prices not set)
+        state_space.prices: base prices
        """
-        current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99
+        demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0])
-        demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices)
+        base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices
        new_prices = current_prices.copy()
        if base is None:
            base = np.ones(len(demand)) * 99.99
        # ensure float dtype to allow multiplication by float multipliers
        new_prices = base.astype(np.float64).copy()
        high_mask = demand >= self.high_threshold
        new_prices[high_mask] *= self.surge_multiplier
@@ -89,3 +143,16 @@ class SimpleSurgePricer(PricingFunction):
        new_prices[low_mask] *= self.discount_multiplier
        return new_prices
    def _get_features(self, state_space=None) -> np.ndarray:
        """Extract demand and base price features for each product"""
        if state_space is None:
            n = len(self.base_prices) if self.base_prices is not None else 0
            return np.zeros((n, 2))
        demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0])
        base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices
        if base is None:
            base = np.ones(len(demand)) * 99.99
        return np.column_stack([demand, base])
--- a/experiments/procesing/steps/session.py
+++ b/experiments/procesing/steps/session.py
@@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep):
    Vectorized session feature extraction - replaces O(n^2) per-row loop.
    Input: interactions_df
    Output: session-level feature matrix
    THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation.
    """
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
--- a/experiments/procesing/tests/test_demand.py
+++ b/experiments/procesing/tests/test_demand.py
@@ -6,6 +6,7 @@ from procesing.steps import (
 )
 def test_compute_demand(pipeline_context):
    random.seed(42)  # deterministic test
    step = ComputeDemandStep(context=pipeline_context)
    # Test with normal interaction data
@@ -26,6 +27,7 @@ def test_compute_demand(pipeline_context):
 def test_compute_demand_skewed(pipeline_context):
    random.seed(42)  # deterministic test
    step = ComputeDemandStep(context=pipeline_context)
    # Test with normal interaction data
--- a/lib/init.py
+++ b/lib/init.py
@@ -0,0 +1,41 @@
 """PHANTOM shared library
 Exports unified utilities for features, state, config, kafka, and model registry
 """
 from .config import (
    PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR,
    AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR,
    COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR,
    ensure_dir, get_data_path, get_experiments_path, get_sim_path,
    KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER,
    REDIS_HOST, REDIS_PORT,
    SUPABASE_URL, SUPABASE_ANON_KEY,
    BACKEND_PORT, PROVIDER_PORT
 )
 from .state import (
    make_state_repr, event_to_state, parse_state,
    get_event_name, get_timestamp,
    create_state_fn, create_event_name_fn, create_timestamp_fn
 )
 from .features import (
    transition_histogram, temporal_signature, state_coverage, transition_entropy,
    event_type_distribution, featurize_trajectory, parse_timestamp
 )
 __all__ = [
    # config
    'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR',
    'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR',
    'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR',
    'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path',
    'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER',
    'REDIS_HOST', 'REDIS_PORT',
    'SUPABASE_URL', 'SUPABASE_ANON_KEY',
    'BACKEND_PORT', 'PROVIDER_PORT',
    # state
    'make_state_repr', 'event_to_state', 'parse_state',
    'get_event_name', 'get_timestamp',
    'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn',
    # features
    'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy',
    'event_type_distribution', 'featurize_trajectory', 'parse_timestamp',
 ]
--- a/lib/config.py
+++ b/lib/config.py
@@ -0,0 +1,65 @@
 """Unified path configuration for PHANTOM project
 All hardcoded paths should reference this module
 Paths can be overridden via environment variables
 """
 import os
 from pathlib import Path
 # project root (directory containing lib/, experiments/, sim/, web/, backend/)
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
 # data directories
 DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data'))
 EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments'))
 # agent/human interaction data
 AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents'))
 HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans'))
 # RL simulation runs
 SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs'))
 # model artifacts
 MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models'))
 # collected experiment data
 COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data'))
 # notebook outputs
 NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs'))
 def ensure_dir(path: Path) -> Path:
    """ensure directory exists, create if needed"""
    path.mkdir(parents=True, exist_ok=True)
    return path
 def get_data_path(*parts: str) -> Path:
    """construct path relative to DATA_DIR"""
    return DATA_DIR.joinpath(*parts)
 def get_experiments_path(*parts: str) -> Path:
    """construct path relative to EXPERIMENTS_DIR"""
    return EXPERIMENTS_DIR.joinpath(*parts)
 def get_sim_path(*parts: str) -> Path:
    """construct path relative to SIM_RUNS_DIR"""
    return SIM_RUNS_DIR.joinpath(*parts)
 # service configuration (from .env)
 KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost')
 KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
 KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
 REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
 REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
 SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '')
 SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '')
 BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000'))
 PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001'))
--- a/lib/features.py
+++ b/lib/features.py
@@ -0,0 +1,125 @@
 """Unified featurization utilities for trajectory -> feature vector conversion
 Used by both experiments/ml/ and sim/rl/ components
 """
 import numpy as np
 from collections import defaultdict
 from typing import List, Dict, Callable, Optional, Any, Set
 from datetime import datetime
 def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray:
    """compute normalized histogram of state transitions in trajectory
    events: list of event objects/dicts
    state_fn: function mapping event -> state string
    max_states: maximum dimensions for histogram
    """
    if len(events) < 2:
        return np.zeros(max_states, dtype=np.float32)
    states = [state_fn(e) for e in events]
    trans_counts = defaultdict(int)
    for s, s_next in zip(states, states[1:]):
        trans_counts[(s, s_next)] += 1
    total = sum(trans_counts.values())
    hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
    hist = np.pad(hist, (0, max(0, max_states - len(hist))))
    return hist / (total + 1e-10)
 def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray:
    """extract temporal features: mean/std/skew of inter-event times plus count
    events: list of event objects/dicts
    ts_fn: function mapping event -> timestamp (float seconds)
    returns: [mean_dt, std_dt, skew, n_intervals] array
    """
    if len(events) < 2:
        return np.zeros(4, dtype=np.float32)
    times = sorted([ts_fn(e) for e in events])
    diffs = np.diff(times).astype(np.float32)
    if len(diffs) == 0:
        return np.zeros(4, dtype=np.float32)
    mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
    skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
    return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
 def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float:
    """fraction of MDP states visited by trajectory
    events: list of event objects/dicts
    state_fn: function mapping event -> state string
    mdp_states: set of all possible MDP states
    """
    if not mdp_states:
        return 0.0
    visited = set(state_fn(e) for e in events)
    return len(visited & mdp_states) / len(mdp_states)
 def transition_entropy(events: List, state_fn: Callable) -> float:
    """compute entropy of transition distribution (randomness of navigation)
    higher entropy = more random browsing pattern
    """
    if len(events) < 2:
        return 0.0
    states = [state_fn(e) for e in events]
    trans_counts = defaultdict(int)
    for s, s_next in zip(states, states[1:]):
        trans_counts[(s, s_next)] += 1
    total = sum(trans_counts.values())
    probs = [c / total for c in trans_counts.values()]
    return -sum(p * np.log(p + 1e-10) for p in probs)
 def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray:
    """compute proportions of different event type categories
    returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio]
    """
    if not events:
        return np.zeros(4, dtype=np.float32)
    n = len(events)
    names = [event_name_fn(e).lower() for e in events]
    return np.array([
        sum(1 for nm in names if 'page' in nm or 'view' in nm) / n,
        sum(1 for nm in names if 'hover' in nm) / n,
        sum(1 for nm in names if 'cart' in nm) / n,
        sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n
    ], dtype=np.float32)
 def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable,
                         event_name_fn: Callable, mdp_states: Optional[Set[str]] = None,
                         output_dim: int = 64) -> np.ndarray:
    """convert trajectory to fixed-dimension feature vector
    events: list of event objects/dicts
    state_fn: function mapping event -> state string
    ts_fn: function mapping event -> timestamp (float)
    event_name_fn: function mapping event -> event name string
    mdp_states: optional set of all MDP states for coverage calculation
    output_dim: desired output dimension (will pad/truncate)
    """
    feats = []
    feats.extend(transition_histogram(events, state_fn, max_states=40))  # 40 dims
    feats.extend(temporal_signature(events, ts_fn))  # 4 dims
    feats.append(state_coverage(events, state_fn, mdp_states or set()))  # 1 dim
    feats.append(transition_entropy(events, state_fn))  # 1 dim
    feats.append(float(len(events)))  # trajectory length
    feats.append(float(len(set(state_fn(e) for e in events))))  # unique states
    feats.extend(event_type_distribution(events, event_name_fn))  # 4 dims
    feats = np.array(feats[:output_dim], dtype=np.float32)
    if len(feats) < output_dim:
        feats = np.pad(feats, (0, output_dim - len(feats)))
    return feats
 def parse_timestamp(ts: Any) -> float:
    """parse various timestamp formats to float seconds"""
    if ts is None:
        return 0.0
    if isinstance(ts, (int, float)):
        return float(ts)
    if isinstance(ts, str):
        try:
            return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
        except ValueError:
            return 0.0
    return 0.0
--- a/lib/kafka_client.py
+++ b/lib/kafka_client.py
@@ -0,0 +1,54 @@
 from kafka import KafkaConsumer
 import json
 import os
 from dotenv import load_dotenv
 load_dotenv()
 def get_interactions(
    topic='user-interactions',
    bootstrap_servers=None,
    from_beginning=True,
    max_records=None,
    timeout_ms=5000
 ):
    """Consume interaction events from Kafka.
    Args:
        topic: Kafka topic name
        bootstrap_servers: Kafka broker address (default from env)
        from_beginning: Start from earliest offset if True
        max_records: Max number of records to fetch (None = all available)
        timeout_ms: Consumer poll timeout
    Returns:
        List of parsed interaction event dicts
    """
    if not bootstrap_servers:
        host = os.getenv('KAFKA_HOST', 'localhost')
        port = os.getenv('KAFKA_PORT', '9092')
        bootstrap_servers = f'{host}:{port}'
    consumer = KafkaConsumer(
        topic,
        bootstrap_servers=bootstrap_servers,
        auto_offset_reset='earliest' if from_beginning else 'latest',
        enable_auto_commit=False,
        value_deserializer=lambda m: json.loads(m.decode('utf-8')),
        consumer_timeout_ms=timeout_ms
    )
    events = []
    try:
        for msg in consumer:
            events.append(msg.value)
            if max_records and len(events) >= max_records:
                break
    finally:
        consumer.close()
    return events
 if __name__ == '__main__':
    interactions = get_interactions(max_records=10)
    for event in interactions:
        print(event)
--- a/lib/model_registry.py
+++ b/lib/model_registry.py
@@ -178,3 +178,49 @@ class ModelRegistry:
            return True
        except:
            return False
    def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800):
        """
        Store prices for a specific session.
        THIS is the write path for session-aware pricing.
        Args:
            session_id: session identifier
            prices: dict of {productId: price}
            ttl: time-to-live in seconds (default 30min)
        """
        if not prices:
            return
        key = f"session:{session_id}:prices"
        # use Redis hash for O(1) lookup per product
        self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()})
        self.redis_client.expire(key, ttl)
    def get_session_price(self, session_id: str, product_id: str) -> Optional[float]:
        """
        Lookup price for (sessionId, productId).
        THIS is the read path for fast provider lookup.
        Returns: price or None if not found
        """
        key = f"session:{session_id}:prices"
        price_str = self.redis_client.hget(key, product_id)
        if price_str is None:
            return None
        return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str)
    def get_session_all_prices(self, session_id: str) -> Dict[str, float]:
        """Get all prices for a session."""
        key = f"session:{session_id}:prices"
        prices_raw = self.redis_client.hgetall(key)
        if not prices_raw:
            return {}
        return {
            (k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v)
            for k, v in prices_raw.items()
        }
--- a/lib/state.py
+++ b/lib/state.py
@@ -0,0 +1,72 @@
 """Unified state representation utilities for MDP state encoding
 Used by both experiments/ and sim/ components for consistent state handling
 """
 from typing import Any, Callable
 def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str:
    """create canonical state representation string from components
    format: page|productId|eventName
    """
    p = page or 'unk'
    pid = product_id or 'none'
    en = event_name or 'unknown'
    return f"{p}|{pid}|{en}"
 def event_to_state(evt: Any) -> str:
    """convert event object/dict to state string
    supports both object attributes and dict keys
    """
    if isinstance(evt, dict):
        return make_state_repr(
            page=evt.get('page'),
            product_id=evt.get('productId'),
            event_name=evt.get('eventName') or evt.get('event_type')
        )
    return make_state_repr(
        page=getattr(evt, 'page', None),
        product_id=getattr(evt, 'productId', None),
        event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None)
    )
 def parse_state(state_str: str) -> dict:
    """parse state string back to components
    returns: {'page': str, 'productId': str, 'eventName': str}
    """
    parts = state_str.split('|')
    return {
        'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None,
        'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None,
        'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None
    }
 def get_event_name(evt: Any) -> str:
    """extract event name from event object/dict"""
    if isinstance(evt, dict):
        return evt.get('eventName') or evt.get('event_type') or ''
    return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or ''
 def get_timestamp(evt: Any) -> Any:
    """extract timestamp from event object/dict"""
    if isinstance(evt, dict):
        return evt.get('ts') or evt.get('timestamp')
    return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None)
 def create_state_fn() -> Callable:
    """factory for state representation function"""
    return event_to_state
 def create_event_name_fn() -> Callable:
    """factory for event name extraction function"""
    return get_event_name
 def create_timestamp_fn() -> Callable:
    """factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)"""
    return get_timestamp
--- a/sim/case/init.py
+++ b/sim/case/init.py
@@ -0,0 +1,2 @@
 """Case-specific simulations and experiments."""
--- a/sim/case/thesis_simplified/init.py
+++ b/sim/case/thesis_simplified/init.py
@@ -0,0 +1,2 @@
 """Minimal thesis-aligned pricing simulation (self-contained)."""
--- a/sim/case/thesis_simplified/coi.py
+++ b/sim/case/thesis_simplified/coi.py
@@ -0,0 +1,125 @@
 """Cost of Information (COI) computation for thesis pricing system.
 Core KPI: COI = E[p_shown] - p_min measures pricing power from information asymmetry.
 Theorem 1 shows COI erodes as agent queries increase: as N->inf, p^(1)->p_min.
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import Dict, List, TYPE_CHECKING
 import numpy as np
 if TYPE_CHECKING:
    from .simplified import Session
@dataclass(frozen=True)
 class COIWindow:
    """Windowed COI metrics computed from realized price exposures.
    policy: E[p_shown] - cost, the definition-level KPI
    agent: E[p^(1)] - cost where p^(1) is min price under agent querying
    leak: max(policy - agent, 0), observable gap from reconnaissance
    survival_ratio: agent/policy, fraction of pricing power retained
    """
    policy: float
    agent: float
    leak: float
    survival_ratio: float
    policy_by_product: np.ndarray
    agent_by_product: np.ndarray
    demand_weights: np.ndarray
 def aggregate_prices(sessions: List["Session"], mode: str = "all") -> Dict[int, List[float] | float]:
    """Unified price aggregation across sessions.
    mode: "all" returns all prices per product, "min_per_session" returns min price per session per product,
          "min_across" returns single min price per product
    """
    if mode == "min_across":
        mins: Dict[int, float] = {}
        for s in sessions:
            for e in s.events:
                pidx, price = int(e.product_idx), float(e.price_seen)
                mins[pidx] = min(mins.get(pidx, price), price)
        return mins
    elif mode == "min_per_session":
        result: Dict[int, List[float]] = {}
        for s in sessions:
            by_p: Dict[int, float] = {}
            for e in s.events:
                pidx, price = int(e.product_idx), float(e.price_seen)
                by_p[pidx] = min(by_p.get(pidx, price), price)
            for pidx, pmin in by_p.items():
                result.setdefault(pidx, []).append(pmin)
        return result
    else:  # "all"
        prices: Dict[int, List[float]] = {}
        for s in sessions:
            for e in s.events:
                prices.setdefault(e.product_idx, []).append(float(e.price_seen))
        return prices
 def demand_weights_by_product(sessions: List["Session"], demand_mapping: Dict[str, float], n_products: int) -> np.ndarray:
    """Compute demand-weighted importance per product."""
    w = np.zeros(n_products, dtype=float)
    sessions_by_id = {s.sid: s for s in sessions}
    for sid, q in demand_mapping.items():
        sess = sessions_by_id.get(sid)
        if sess and sess.events:
            w[int(sess.events[0].product_idx)] += float(q)
    total = float(np.sum(w))
    return (w / total) if total > 0 else w
 def compute_coi_window(sessions: List["Session"], costs: np.ndarray, demand_mapping: Dict[str, float] | None = None) -> COIWindow:
    """Compute COI metrics over session window.
    Aggregates price exposures and computes policy-level vs agent-realized COI.
    """
    n = int(len(costs))
    prices = aggregate_prices(sessions, mode="all")
    agent_sessions = [s for s in sessions if s.actor == "A"]
    agent_min = aggregate_prices(agent_sessions, mode="min_across") if agent_sessions else {}
    policy_by = np.zeros(n, dtype=float)
    agent_by = np.zeros(n, dtype=float)
    seen = np.array([(i in prices) for i in range(n)], dtype=bool)
    agent_seen = np.array([(i in agent_min) for i in range(n)], dtype=bool)
    for pidx, ps in prices.items():
        if 0 <= pidx < n and ps:
            policy_by[pidx] = float(np.mean(ps) - float(costs[pidx]))
    for pidx, pmin in agent_min.items():
        if 0 <= pidx < n:
            agent_by[pidx] = float(pmin - float(costs[pidx]))
    agent_by[seen & ~agent_seen] = policy_by[seen & ~agent_seen]  # no erosion if no agent exposure
    demand_w = demand_weights_by_product(sessions, demand_mapping, n) if demand_mapping else np.zeros(n, dtype=float)
    has_weights = float(np.sum(demand_w)) > 0
    if has_weights:
        policy, agent = float(np.dot(demand_w, policy_by)), float(np.dot(demand_w, agent_by))
    elif np.any(seen):
        policy, agent = float(np.mean(policy_by[seen])), float(np.mean(agent_by[seen]))
    else:
        policy, agent = 0.0, 0.0
    leak = float(max(policy - agent, 0.0))
    survival = float(np.clip(agent / policy, 0.0, 1.0)) if policy > 0 else 0.0
    return COIWindow(policy=policy, agent=agent, leak=leak, survival_ratio=survival,
                     policy_by_product=policy_by, agent_by_product=agent_by, demand_weights=demand_w)
 def coi_erosion(coi_policy: float, coi_agent: float, eps: float = 1e-9) -> float:
    """Thesis-consistent COI erosion: fraction of pricing power destroyed by agent queries.
    erosion = 1 - (COI_agent / COI_policy)
    When agents find low prices, COI_agent -> 0, erosion -> 1.
    """
    if coi_policy <= eps:
        return 0.0
    return float(np.clip(1.0 - (coi_agent / (coi_policy + eps)), 0.0, 1.0))
--- a/sim/case/thesis_simplified/experiments.py
+++ b/sim/case/thesis_simplified/experiments.py
@@ -0,0 +1,325 @@
 """COI leakage experiments and policy comparisons.
 Demonstrates the core thesis contribution: COI erosion under agent contamination
 and recovery via robust pricing policies.
 Generates TensorBoard logs for:
 - COI erosion curves across contamination levels
 - Policy comparison (fixed vs adaptive vs RL)
 - Revenue/margin trade-offs
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Tuple
 import json
 import numpy as np
 try:
    from torch.utils.tensorboard import SummaryWriter
    HAS_TB = True
 except ImportError:
    HAS_TB = False
 from .simplified_env import PricingEnv, EnvConfig, make_env
 from .simplified import System
@dataclass
 class ExperimentResult:
    """Container for experiment metrics."""
    name: str
    alpha: float
    reward_mean: float
    reward_std: float
    coi_erosion: float
    alpha_error: float
    revenue: float
    margin: float
    def to_dict(self) -> dict:
        return {k: getattr(self, k) for k in self.__dataclass_fields__}
 def theoretical_coi_erosion_curve(alphas: np.ndarray, n_sessions: int = 1000) -> np.ndarray:
    """Theoretical COI erosion from Theorem 1 using order statistic model.
    For N i.i.d. uniform queries on [p_min, p_max]:
    E[p^(1)] = p_min + (p_max - p_min)/(N+1), so erosion = 1 - 2/(N+1)
    """
    erosions = []
    for a in alphas:
        n_agents = max(1, int(a * n_sessions))
        erosions.append(1.0 - 2.0 / (n_agents + 1))
    return np.array(erosions)
 def run_policy_episode(
    env: PricingEnv,
    policy_fn,
    n_episodes: int = 10
 ) -> Tuple[List[float], List[float], List[float], List[float]]:
    """Run policy and collect per-step metrics."""
    rewards, coi_erosions, alpha_errors, revenues = [], [], [], []
    for _ in range(n_episodes):
        obs, info = env.reset()
        done = False
        while not done:
            action = policy_fn(obs, env.n)
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            rewards.append(reward)
            if 'coi_erosion' in info:
                coi_erosions.append(info['coi_erosion'])
            if 'alpha_true' in info and 'alpha_est' in info:
                alpha_errors.append(abs(info['alpha_true'] - info['alpha_est']))
            if 'revenue' in info:
                revenues.append(info['revenue'])
    return rewards, coi_erosions, alpha_errors, revenues
 class PolicyRegistry:
    """Registry of baseline policies."""
    @staticmethod
    def fixed(obs: np.ndarray, n: int, margin: float = 0.15) -> np.ndarray:
        return np.ones(n, dtype=np.float32) * (1.0 + margin)
    @staticmethod
    def random(obs: np.ndarray, n: int, rng: np.random.Generator = None) -> np.ndarray:
        rng = rng or np.random.default_rng()
        return rng.uniform(0.7, 1.3, n).astype(np.float32)
    @staticmethod
    def adaptive(obs: np.ndarray, n: int, base_margin: float = 0.15) -> np.ndarray:
        """Reduce margins when alpha estimate is high."""
        alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
        margin_scale = 1.0 - 0.4 * alpha_est
        return np.ones(n, dtype=np.float32) * (1.0 + base_margin * margin_scale)
    @staticmethod
    def aggressive(obs: np.ndarray, n: int) -> np.ndarray:
        """High margins, ignores contamination."""
        return np.ones(n, dtype=np.float32) * 1.4
    @staticmethod
    def defensive(obs: np.ndarray, n: int) -> np.ndarray:
        """Low margins, always cautious."""
        return np.ones(n, dtype=np.float32) * 1.05
    @staticmethod
    def alpha_proportional(obs: np.ndarray, n: int, max_margin: float = 0.3) -> np.ndarray:
        """Margin inversely proportional to estimated alpha."""
        alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
        margin = max_margin * (1.0 - alpha_est)
        return np.ones(n, dtype=np.float32) * (1.0 + margin)
 def run_contamination_sweep(
    alphas: List[float],
    policies: Dict[str, callable],
    n_products: int = 10,
    max_steps: int = 200,
    n_episodes: int = 10,
    seed: int = 42,
    log_dir: str = None
 ) -> Dict[str, List[ExperimentResult]]:
    """Run policies across contamination levels."""
    results = {name: [] for name in policies}
    writer = SummaryWriter(Path(log_dir) / "sweep") if log_dir and HAS_TB else None
    for alpha in alphas:
        print(f"  alpha={alpha:.2f}", end=" ")
        env_cfg = EnvConfig(
            n_products=n_products, max_steps=max_steps,
            alpha_true=alpha, reward_mode="robust", seed=seed)
        env = make_env(env_cfg)
        for name, policy_fn in policies.items():
            rewards, coi_vals, alpha_errs, revenues = run_policy_episode(env, policy_fn, n_episodes)
            result = ExperimentResult(
                name=name, alpha=alpha,
                reward_mean=float(np.mean(rewards)),
                reward_std=float(np.std(rewards)),
                coi_erosion=float(np.mean(coi_vals)) if coi_vals else 0.0,
                alpha_error=float(np.mean(alpha_errs)) if alpha_errs else 0.0,
                revenue=float(np.mean(revenues)) if revenues else 0.0,
                margin=float(np.mean([policy_fn(np.zeros(3 * n_products + 3), n_products)]) - 1.0))
            results[name].append(result)
            if writer:
                step = int(alpha * 100)
                writer.add_scalar(f'{name}/reward', result.reward_mean, step)
                writer.add_scalar(f'{name}/coi_erosion', result.coi_erosion, step)
                writer.add_scalar(f'{name}/alpha_error', result.alpha_error, step)
                writer.add_scalar(f'{name}/revenue', result.revenue, step)
        print(f"done")
    # add theoretical curve
    if writer:
        theo = theoretical_coi_erosion_curve(np.array(alphas))
        for i, (a, e) in enumerate(zip(alphas, theo)):
            writer.add_scalar('theoretical/coi_erosion', e, int(a * 100))
        writer.close()
    return results
 def run_coi_demonstration(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
    """Main COI demonstration experiment."""
    print("=== COI Leakage Demonstration ===\n")
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(Path(log_dir) / "coi_demo") if HAS_TB else None
    # theoretical erosion curve
    print("1. Theoretical COI erosion (Theorem 1)")
    alphas = np.linspace(0.0, 0.6, 13)
    theo_erosion = theoretical_coi_erosion_curve(alphas, n_sessions=1000)
    for a, e in zip(alphas, theo_erosion):
        print(f"   alpha={a:.2f} -> erosion={e:.3f}")
        if writer:
            writer.add_scalar('theory/coi_erosion', e, int(a * 100))
    # policy comparison
    print("\n2. Policy comparison across contamination levels")
    policies = {
        'fixed': lambda obs, n: PolicyRegistry.fixed(obs, n),
        'aggressive': PolicyRegistry.aggressive,
        'defensive': PolicyRegistry.defensive,
        'adaptive': PolicyRegistry.adaptive,
        'alpha_proportional': PolicyRegistry.alpha_proportional,
    }
    sweep_alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
    results = run_contamination_sweep(
        sweep_alphas, policies, n_products=10, max_steps=100,
        n_episodes=5, seed=seed, log_dir=log_dir)
    # summarize
    print("\n3. Summary by policy")
    for name, res_list in results.items():
        avg_reward = np.mean([r.reward_mean for r in res_list])
        avg_coi = np.mean([r.coi_erosion for r in res_list])
        print(f"   {name:20s}: avg_reward={avg_reward:.2f}, avg_coi={avg_coi:.3f}")
    # save results
    output = {
        'theoretical': {'alphas': alphas.tolist(), 'erosion': theo_erosion.tolist()},
        'empirical': {name: [r.to_dict() for r in res_list] for name, res_list in results.items()}}
    with open(Path(log_dir) / "coi_demo_results.json", 'w') as f:
        json.dump(output, f, indent=2)
    if writer:
        writer.close()
    print(f"\nResults saved to {log_dir}/coi_demo_results.json")
    print(f"TensorBoard: tensorboard --logdir {log_dir}")
    return output
 def run_reward_mode_comparison(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
    """Compare different reward modes."""
    print("=== Reward Mode Comparison ===\n")
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(Path(log_dir) / "reward_modes") if HAS_TB else None
    reward_modes = ["revenue", "profit", "robust", "coi_aware"]
    alpha = 0.3  # moderate contamination
    results = {}
    for mode in reward_modes:
        print(f"  mode={mode}", end=" ")
        env_cfg = EnvConfig(
            n_products=10, max_steps=200, alpha_true=alpha,
            reward_mode=mode, seed=seed)
        env = make_env(env_cfg)
        rewards, coi_vals, _, revenues = run_policy_episode(
            env, PolicyRegistry.adaptive, n_episodes=10)
        results[mode] = {
            'reward_mean': float(np.mean(rewards)),
            'reward_std': float(np.std(rewards)),
            'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
            'revenue': float(np.mean(revenues)) if revenues else 0.0}
        if writer:
            for k, v in results[mode].items():
                writer.add_scalar(f'{mode}/{k}', v, 0)
        print(f"reward={results[mode]['reward_mean']:.2f}, coi={results[mode]['coi_erosion']:.3f}")
    if writer:
        writer.close()
    with open(Path(log_dir) / "reward_mode_results.json", 'w') as f:
        json.dump(results, f, indent=2)
    return results
 def run_alpha_drift_experiment(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
    """Test policy robustness under non-stationary contamination."""
    print("=== Alpha Drift Experiment ===\n")
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(Path(log_dir) / "alpha_drift") if HAS_TB else None
    drift_rates = [0.0, 0.01, 0.02, 0.05]
    results = {}
    for drift in drift_rates:
        print(f"  drift={drift:.2f}", end=" ")
        env_cfg = EnvConfig(
            n_products=10, max_steps=200, alpha_true=0.2,
            alpha_drift=drift, reward_mode="robust", seed=seed)
        env = make_env(env_cfg)
        rewards, coi_vals, alpha_errs, _ = run_policy_episode(
            env, PolicyRegistry.adaptive, n_episodes=10)
        results[f'drift_{drift}'] = {
            'reward_mean': float(np.mean(rewards)),
            'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
            'alpha_tracking_error': float(np.mean(alpha_errs)) if alpha_errs else 0.0}
        if writer:
            for k, v in results[f'drift_{drift}'].items():
                writer.add_scalar(f'drift_{drift}/{k}', v, 0)
        print(f"reward={results[f'drift_{drift}']['reward_mean']:.2f}, "
              f"alpha_err={results[f'drift_{drift}']['alpha_tracking_error']:.3f}")
    if writer:
        writer.close()
    return results
 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Run COI experiments")
    parser.add_argument("--exp", type=str, default="coi", choices=["coi", "reward", "drift", "all"])
    parser.add_argument("--log-dir", type=str, default="sim/case/thesis_simplified/runs")
    parser.add_argument("--seed", type=int, default=42)
    args = parser.parse_args()
    if args.exp == "coi" or args.exp == "all":
        run_coi_demonstration(args.log_dir, args.seed)
    if args.exp == "reward" or args.exp == "all":
        run_reward_mode_comparison(args.log_dir, args.seed)
    if args.exp == "drift" or args.exp == "all":
        run_alpha_drift_experiment(args.log_dir, args.seed)
--- a/sim/case/thesis_simplified/separability.py
+++ b/sim/case/thesis_simplified/separability.py
@@ -0,0 +1,72 @@
 """Behavioral separability for human/agent detection.
 Computes divergence signals delta_H, delta_A from session trajectories using
 transition kernel estimation and KL divergence to prototype behavioral profiles.
 """
 from __future__ import annotations
 from typing import Dict, List, Tuple, TYPE_CHECKING
 import numpy as np
 if TYPE_CHECKING:
    from .simplified import Event, Session
 # prototype behavioral kernels for human vs agent sessions
 TRANS_H = {
    "start": {"view": 0.85, "end": 0.15},
    "view": {"detail": 0.4, "cart": 0.3, "view": 0.2, "end": 0.1},
    "detail": {"cart": 0.5, "view": 0.3, "end": 0.2},
    "cart": {"purchase": 0.6, "view": 0.25, "end": 0.15},
    "purchase": {"end": 1.0},
 }
 TRANS_A = {
    "start": {"view": 0.95, "end": 0.05},
    "view": {"detail": 0.6, "view": 0.25, "cart": 0.1, "end": 0.05},
    "detail": {"view": 0.5, "cart": 0.15, "detail": 0.3, "end": 0.05},
    "cart": {"view": 0.4, "purchase": 0.2, "end": 0.4},
    "purchase": {"end": 1.0},
 }
 def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
    """KL divergence D_KL(p || q) for discrete distributions."""
    keys = set(p.keys()) | set(q.keys())
    return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys)
 def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
    """Build empirical transition kernel T' from trajectory events."""
    trans: Dict[str, Dict[str, int]] = {}
    prev = "start"
    for e in events:
        curr = e.action
        trans.setdefault(prev, {})
        trans[prev][curr] = trans[prev].get(curr, 0) + 1
        prev = curr
    return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0}
 def compute_divergence(session: "Session") -> Tuple[float, float]:
    """Compute divergence signals delta_H, delta_A for session.
    delta_H = mean KL(T' || T_H) across states, measures distance to human prototype
    delta_A = mean KL(T' || T_A) across states, measures distance to agent prototype
    """
    kernel = build_kernel(session.events)
    if not kernel:
        return 0.5, 0.5
    delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel)
    delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel)
    return delta_h, delta_a
 def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
    """Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)).
    Returns probability session is agent-generated based on behavioral divergence.
    """
    dh, da = compute_divergence(session)
    if (dh + da) <= 0:
        return 0.5
    return 1.0 / (1.0 + np.exp(-beta * (dh - da)))
--- a/sim/case/thesis_simplified/simplified.py
+++ b/sim/case/thesis_simplified/simplified.py
@@ -0,0 +1,219 @@
 """Minimal implementation of thesis pricing system.
 Implements the core loop: prices -> sessions -> demand -> prices
 with behavioral separability and robust pricing objective.
 Objects:
 - Session trajectories tau_s from mixture of H/A behavioral profiles
 - Demand proxy q_hat via weighted action aggregation
 - COI leakage penalty for agent reconnaissance
 - Limbo: alternating price/demand history for trajectory analysis
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Dict, List, Tuple
 import numpy as np
 from .coi import COIWindow, compute_coi_window
 from .separability import TRANS_H, TRANS_A, kl_div, build_kernel, compute_divergence, estimate_alpha
 ACTION_WEIGHTS = {"add_to_cart": 0.8, "checkout": 0.9, "purchase": 1.0, "view": 0.15, "detail": 0.25, "hover": 0.3, "start": 0.05, "end": 0.0}
@dataclass
 class Event:
    action: str
    product_idx: int
    price_seen: float
    ts: float
@dataclass
 class Session:
    sid: str
    events: List[Event]
    actor: str  # H or A (ground truth label)
    theta: Dict[str, float] = field(default_factory=dict)
 def compute_demand(session: Session) -> float:
    """Compute demand proxy q_hat = sum_k omega(a_k) for session."""
    return sum(ACTION_WEIGHTS.get(e.action, 0.1) for e in session.events)
 def sample_trajectory(rng: np.random.Generator, trans: Dict, prices: np.ndarray, costs: np.ndarray, theta: Dict[str, float],
                      is_agent: bool, session_noise: float = 0.02, surge: float = 0.08, max_mult: float = 1.8) -> Tuple[List[Event], int]:
    """Sample session trajectory from behavioral kernel."""
    pidx = int(rng.integers(0, len(prices)))
    cost, base = float(costs[pidx]), float(prices[pidx]) * (1.0 + rng.normal(0.0, session_noise))
    base = float(np.clip(base, cost * 1.01, float(prices[pidx]) * 2.0))
    price, signal, state, t = base, 0.0, "start", 0.0
    events = []
    while state != "end" and len(events) < 30:
        probs = trans.get(state, {"end": 1.0})
        nxt = rng.choice(list(probs.keys()), p=list(probs.values()))
        if nxt == "purchase":  # purchase conversion check
            rel = max((price - cost) / (cost + 1e-6), 0.0)
            p_buy = float(np.clip(theta.get("base_conv", 0.2) * np.exp(-theta.get("price_sens", 2.0) * rel), 0.0, 1.0))
            if rng.random() > p_buy:
                nxt = "end"
        state = nxt
        if state not in {"start", "end"}:
            events.append(Event(action=state, product_idx=pidx, price_seen=float(price), ts=t))
            signal += float(ACTION_WEIGHTS.get(state, 0.1))
            price = float(np.clip(base * (1.0 + surge * signal), cost * 1.01, base * max_mult))
        t += max(0.2, rng.gamma(1.5, 0.8) if is_agent else rng.gamma(2.0, 1.2))
    return events, pidx
 def put_prices_to_market(prices: np.ndarray, costs: np.ndarray, alpha: float = 0.2, n_sessions: int = 50,
                         seed: int | None = None) -> Tuple[List[Session], Dict[str, float]]:
    """Generate sessions from mixture model. Returns sessions and demand mapping sid -> q_hat."""
    rng = np.random.default_rng(seed)
    sessions, demand = [], {}
    for i in range(n_sessions):
        sid = f"s{i:04d}"
        is_agent = rng.random() < alpha
        trans = TRANS_A if is_agent else TRANS_H
        theta = {"price_sens": rng.uniform(0.05, 0.2), "base_conv": 0.01} if is_agent else \
                {"price_sens": rng.uniform(1.5, 4.0), "base_conv": rng.uniform(0.2, 0.5)}
        events, _ = sample_trajectory(rng, trans, prices, costs=costs, theta=theta, is_agent=is_agent)
        session = Session(sid=sid, events=events, actor="A" if is_agent else "H", theta=theta)
        sessions.append(session)
        demand[sid] = compute_demand(session)
    return sessions, demand
@dataclass
 class LimboUpdate:
    utype: str  # "prices" or "demand"
    data: np.ndarray | Dict[str, float]
    t: int
 class Limbo:
    """Historical trajectory of alternating price/demand observations."""
    def __init__(self):
        self.history: List[LimboUpdate] = []
        self._t = 0
    def add_update(self, utype: str, data: np.ndarray | Dict[str, float]) -> Dict:
        self.history.append(LimboUpdate(utype=utype, data=data, t=self._t))
        self._t += 1
        return {"action": "observe_demand" if utype == "prices" else "set_prices"}
    def get_prices_history(self) -> List[np.ndarray]:
        return [u.data for u in self.history if u.utype == "prices"]
    def get_demand_history(self) -> List[Dict[str, float]]:
        return [u.data for u in self.history if u.utype == "demand"]
 class System:
    """Main pricing system implementing robust Stackelberg objective.
    Manages the alternating loop: set prices p_t -> observe demand Q_hat(p_t) ->
    estimate contamination alpha from behavioral signals -> compute next prices.
    """
    def __init__(self, n_products: int = 10, costs: np.ndarray | None = None, lambda_coi: float = 0.5, seed: int | None = 42):
        self.n = n_products
        self.rng = np.random.default_rng(seed)
        self.costs = costs if costs is not None else self.rng.uniform(10, 50, n_products)
        self.refs = self.costs * (1 + self.rng.uniform(0.2, 0.5, n_products))
        self.lambda_coi = lambda_coi
        self.limbo = Limbo()
        self._alpha_est = 0.2
        self._sessions: List[Session] = []
        self._last_sessions: List[Session] = []
        self._last_coi: COIWindow | None = None
    @property
    def alpha(self) -> float:
        return self._alpha_est
    def _estimate_alpha_from_sessions(self) -> float:
        if not self._sessions:
            return self._alpha_est
        return float(np.mean([estimate_alpha(s) for s in self._sessions[-50:]]))
    def _revenue_under_demand(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
        agg = np.zeros(self.n)
        for sid, q in demand.items():
            sess = next((s for s in self._sessions if s.sid == sid), None)
            if sess and sess.events:
                agg[sess.events[0].product_idx] += q
        return float(np.dot(prices, agg))
    def _compute_coi_window(self, demand: Dict[str, float]) -> COIWindow:
        if not self._last_sessions:
            zeros = np.zeros(self.n, dtype=float)
            return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=0.0,
                             policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)
        return compute_coi_window(self._last_sessions, self.costs, demand_mapping=demand)
    def _objective(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
        """Robust objective: R(p,d) - lambda * COI_leak."""
        profit = self._revenue_under_demand(prices, demand) - float(np.sum(self.costs))
        self._last_coi = self._compute_coi_window(demand)
        return profit - self.lambda_coi * self._last_coi.leak
    def compute_prices(self, demand: Dict[str, float] | None = None) -> np.ndarray:
        """Compute next prices via heuristic margin adjustment based on alpha estimate."""
        self._alpha_est = self._estimate_alpha_from_sessions()
        margin_scale = 1.0 - 0.5 * self._alpha_est  # defensive pricing under high contamination
        margins = (self.refs - self.costs) * margin_scale
        noise = self.rng.normal(0, 0.02, self.n) * self.costs
        prices = np.clip(self.costs + margins + noise, self.costs * 1.02, self.refs * 1.3)
        self.limbo.add_update("prices", prices)
        return prices
    def observe_demand(self, prices: np.ndarray, alpha_true: float = 0.2, n_sessions: int = 50) -> Dict[str, float]:
        sessions, demand_map = put_prices_to_market(prices, costs=self.costs, alpha=alpha_true,
                                                    n_sessions=n_sessions, seed=int(self.rng.integers(0, 10000)))
        self._last_sessions = sessions
        self._sessions.extend(sessions)
        self.limbo.add_update("demand", demand_map)
        return demand_map
    def step(self, alpha_true: float = 0.2, n_sessions: int = 50) -> Tuple[np.ndarray, Dict[str, float], float, COIWindow]:
        demand_hist = self.limbo.get_demand_history()
        prices = self.compute_prices(demand_hist[-1] if demand_hist else None)
        demand = self.observe_demand(prices, alpha_true, n_sessions)
        reward = self._objective(prices, demand)
        return prices, demand, reward, self._last_coi or self._compute_coi_window(demand)
    def run(self, n_steps: int = 100, alpha_true: float = 0.2) -> Dict:
        traj = {"prices": [], "demand": [], "rewards": [], "alpha_est": [], "alpha_true": alpha_true,
                "coi_policy": [], "coi_agent": [], "coi_leak": [], "coi_survival": []}
        for _ in range(n_steps):
            p, d, r, coi = self.step(alpha_true)
            traj["prices"].append(p); traj["demand"].append(d); traj["rewards"].append(r)
            traj["alpha_est"].append(self._alpha_est)
            traj["coi_policy"].append(coi.policy); traj["coi_agent"].append(coi.agent)
            traj["coi_leak"].append(coi.leak); traj["coi_survival"].append(coi.survival_ratio)
        return traj
 if __name__ == "__main__":
    sys = System(n_products=5, seed=42)
    traj = sys.run(n_steps=20, alpha_true=0.25)
    print(f"avg reward: {np.mean(traj['rewards']):.2f}, final alpha_hat: {traj['alpha_est'][-1]:.3f}, "
          f"COI_policy: {np.mean(traj['coi_policy']):.3f}, COI_agent: {np.mean(traj['coi_agent']):.3f}, leak: {np.mean(traj['coi_leak']):.3f}")
    prices = np.array([20.0, 35.0, 50.0, 25.0, 40.0])
    costs = np.array([15.0, 28.0, 40.0, 18.0, 30.0])
    sessions, demand = put_prices_to_market(prices, costs=costs, alpha=0.3, n_sessions=20, seed=123)
    print(f'sessions: {len(sessions)}, agents: {sum(1 for s in sessions if s.actor=="A")}')
    for n in [1, 5, 10, 50, 100]:
        # theoretical: erosion = 1 - 2/(N+1) for uniform order statistic
        print(f'N={n:3d} agents -> COI erosion: {1.0 - 2.0/(n+1):.3f}')
    events = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.5), Event('cart', 0, 20.0, 1.0), Event('purchase', 0, 20.0, 2.0)]
    print(f'human-like session alpha_hat: {estimate_alpha(Session(sid="test", events=events, actor="H")):.3f}')
    events_a = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.2), Event('view', 0, 20.0, 0.3), Event('detail', 0, 20.0, 0.4)]
    print(f'agent-like session alpha_hat: {estimate_alpha(Session(sid="test2", events=events_a, actor="A")):.3f}')
--- a/sim/case/thesis_simplified/simplified_env.py
+++ b/sim/case/thesis_simplified/simplified_env.py
@@ -0,0 +1,249 @@
 """Gymnasium-compatible RL environment for thesis pricing system.
 Wraps simplified.System with standard Gym interface for training pricing policies.
 Supports multiple reward modes and contamination scenarios.
 Action: price multipliers [0.5, 1.5] applied to reference prices
 Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
 Reward: configurable objective (revenue, profit, robust, coi-aware)
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import Any, Dict, Tuple
 import numpy as np
 try:
    import gymnasium as gym
    from gymnasium import spaces
    HAS_GYM = True
 except ImportError:
    HAS_GYM = False
 from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
 from .coi import COIWindow, compute_coi_window, coi_erosion
@dataclass
 class EnvConfig:
    n_products: int = 5
    max_steps: int = 200
    sessions_per_step: int = 30
    alpha_true: float = 0.2
    alpha_drift: float = 0.0
    alpha_bounds: Tuple[float, float] = (0.0, 0.6)
    lambda_coi: float = 0.5
    lambda_vol: float = 0.1
    reward_mode: str = "robust"  # revenue | profit | robust | coi_aware
    normalize_reward: bool = True
    seed: int | None = 42
 def aggregate_purchases(sessions: list[Session], n_products: int, costs: np.ndarray) -> Tuple[np.ndarray, float, float]:
    """Aggregate purchases from sessions, returns (counts, revenue, cost)."""
    purchases = np.zeros(n_products, dtype=float)
    revenue, cost = 0.0, 0.0
    for sess in sessions:
        for e in sess.events:
            if e.action == "purchase" and 0 <= e.product_idx < n_products:
                purchases[e.product_idx] += 1.0
                revenue += float(e.price_seen)
                cost += float(costs[e.product_idx])
    return purchases, revenue, cost
 class PricingEnv(gym.Env if HAS_GYM else object):
    """RL environment for dynamic pricing under agent contamination.
    Platform sets prices p_t, market responds with mixture demand Q(p) = (1-alpha)*D_H + alpha*D_A.
    Agent estimates contamination alpha_hat from behavioral signals.
    Reward balances profit vs COI leakage.
    """
    metadata = {"render_modes": ["human", "ansi"]}
    def __init__(self, cfg: EnvConfig | None = None):
        if not HAS_GYM:
            raise ImportError("gymnasium required")
        self.cfg = cfg or EnvConfig()
        self.n = self.cfg.n_products
        self._sys: System | None = None
        self._t = 0
        self._alpha = self.cfg.alpha_true
        self._last_prices: np.ndarray | None = None
        self._last_demand: Dict[str, float] | None = None
        self._episode_rewards: list[float] = []
        self._demand_agg = np.zeros(self.n)
        self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
        obs_dim = self.n + self.n + 1 + 1 + self.n + 1  # prices + demand + alpha_hat + alpha + margins + t
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
    def _build_obs(self) -> np.ndarray:
        if self._sys is None:
            return np.zeros(self.observation_space.shape[0], dtype=np.float32)
        prices = self._last_prices if self._last_prices is not None else self._sys.refs
        return np.concatenate([
            prices / (self._sys.refs + 1e-6),
            self._demand_agg / (np.sum(self._demand_agg) + 1e-6),
            [self._sys.alpha, self._alpha],
            (prices - self._sys.costs) / (self._sys.costs + 1e-6),
            [self._t / self.cfg.max_steps],
        ]).astype(np.float32)
    def _compute_reward(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
        cfg, sys = self.cfg, self._sys
        if sys is None:
            return 0.0
        # aggregate demand per product
        agg = np.zeros(self.n)
        for sid, q in demand.items():
            sess = next((s for s in sys._sessions if s.sid == sid), None)
            if sess and sess.events:
                agg[sess.events[0].product_idx] += q
        self._demand_agg = agg
        _, revenue, cost = aggregate_purchases(sys._last_sessions, self.n, sys.costs)
        profit = revenue - cost
        vol_penalty = 0.0
        if self._last_prices is not None:
            vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))
        coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
        leak = float(coi.leak)
        reward_fns = {
            "revenue": lambda: revenue,
            "profit": lambda: profit,
            "robust": lambda: profit - cfg.lambda_coi * leak - vol_penalty,
            "coi_aware": lambda: profit - cfg.lambda_coi * (1 + 2 * sys.alpha) * leak - vol_penalty,
        }
        r = reward_fns.get(cfg.reward_mode, lambda: profit)()
        return float(r / (float(np.sum(sys.refs)) + 1e-6)) if cfg.normalize_reward else float(r)
    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
        seed = seed if seed is not None else self.cfg.seed
        self._sys = System(n_products=self.n, lambda_coi=self.cfg.lambda_coi, seed=seed)
        self._t, self._alpha = 0, self.cfg.alpha_true
        self._last_prices, self._last_demand = None, None
        self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
        return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
                                   "costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}
    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
        if self._sys is None:
            raise RuntimeError("call reset() first")
        action = np.clip(action, 0.5, 1.5)
        prices = np.clip(self._sys.refs * action.astype(np.float64), self._sys.costs * 1.01, self._sys.refs * 2.0)
        demand = self._sys.observe_demand(prices, alpha_true=self._alpha, n_sessions=self.cfg.sessions_per_step)
        self._sys.limbo.add_update("prices", prices)
        self._sys._alpha_est = self._sys._estimate_alpha_from_sessions()
        reward = self._compute_reward(prices, demand)
        self._episode_rewards.append(reward)
        self._last_prices, self._last_demand = prices.copy(), demand
        self._t += 1
        # compute info metrics using shared helper
        purchases, revenue, cost = aggregate_purchases(self._sys._last_sessions, self.n, self._sys.costs)
        n_agents = int(self._alpha * self.cfg.sessions_per_step)
        coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)
        info = {
            "alpha_true": self._alpha, "alpha_est": self._sys.alpha,
            "alpha_error": abs(self._alpha - self._sys.alpha),
            "revenue": float(revenue), "profit": float(revenue - cost), "cost": float(cost),
            "n_purchases": int(np.sum(purchases)),
            "avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
            "n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
            "coi_erosion": coi_erosion(coi.policy, coi.agent),
            "coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
            "coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
            "cumulative_reward": sum(self._episode_rewards), "step": self._t,
        }
        return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info
    def render(self, mode: str = "human") -> str | None:
        if self._sys is None or self._last_prices is None:
            return None
        out = f"t={self._t}/{self.cfg.max_steps} | alpha_true={self._alpha:.3f} alpha_hat={self._sys.alpha:.3f} | " \
              f"prices: {self._last_prices.round(1)} | demand: {self._demand_agg.round(2)} | " \
              f"reward: {self._episode_rewards[-1] if self._episode_rewards else 0:.3f}"
        if mode == "human":
            print(out)
        return out
    def close(self) -> None:
        pass
 class ContaminationSweepEnv(PricingEnv):
    """Environment that sweeps through contamination levels during training."""
    def __init__(self, cfg: EnvConfig | None = None, alpha_schedule: list[float] | None = None):
        super().__init__(cfg)
        self._schedule = alpha_schedule or [0.1, 0.2, 0.3, 0.4, 0.5]
        self._schedule_idx = 0
    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
        if options and options.get("advance_schedule", False):
            self._schedule_idx = (self._schedule_idx + 1) % len(self._schedule)
        self.cfg.alpha_true = self._schedule[self._schedule_idx]
        return super().reset(seed, options)
 class AdversarialEnv(PricingEnv):
    """Environment with adversarial contamination dynamics.
    Contamination increases when prices are predictable (agents exploit).
    """
    def __init__(self, cfg: EnvConfig | None = None, exploitation_rate: float = 0.02):
        super().__init__(cfg)
        self._exploit_rate = exploitation_rate
        self._price_history: list[np.ndarray] = []
    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
        obs, reward, term, trunc, info = super().step(action)
        if self._last_prices is not None:
            self._price_history.append(self._last_prices.copy())
        predictability = 0.0
        if len(self._price_history) > 10:
            predictability = 1.0 / (float(np.std(self._price_history[-10:])) + 0.1)
            self._alpha = np.clip(self._alpha + self._exploit_rate * predictability * self._sys.rng.random(), *self.cfg.alpha_bounds)
        info["predictability"] = predictability
        return obs, reward, term, trunc, info
    def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
        self._price_history = []
        return super().reset(seed, options)
 def make_env(cfg: EnvConfig | None = None, env_type: str = "standard") -> PricingEnv:
    return {"sweep": ContaminationSweepEnv, "adversarial": AdversarialEnv}.get(env_type, PricingEnv)(cfg)
 # baseline policies
 fixed_price_policy = lambda refs, margin=0.0: np.ones(len(refs), dtype=np.float32) * (1.0 + margin)
 random_policy = lambda n, rng=None: (rng or np.random.default_rng()).uniform(0.7, 1.3, n).astype(np.float32)
 adaptive_policy = lambda obs, n, base=0.1: np.ones(n, dtype=np.float32) * (1.0 + base * (1.0 - 0.4 * obs[2 * n]))
 if __name__ == "__main__":
    cfg = EnvConfig(n_products=100, max_steps=100, alpha_true=0.25, reward_mode="robust")
    env = make_env(cfg)
    obs, info = env.reset()
    print(f"initial: alpha={info['alpha_true']:.2f}")
    total_reward = 0.0
    for t in range(cfg.max_steps):
        action = adaptive_policy(obs, cfg.n_products)
        obs, reward, done, _, info = env.step(action)
        total_reward += reward
        if t % 10 == 0:
            env.render()
        if done:
            break
    print(f"\ntotal reward: {total_reward:.2f}, final alpha_hat: {info['alpha_est']:.3f}")
--- a/sim/case/thesis_simplified/summarize.py
+++ b/sim/case/thesis_simplified/summarize.py
@@ -0,0 +1,168 @@
 """Summarize TensorBoard logs into comparison tables."""
 from __future__ import annotations
 import json
 import re
 from pathlib import Path
 from collections import defaultdict
 from dataclasses import dataclass
 import pandas as pd
 try:
    from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
    HAS_TB = True
 except ImportError:
    HAS_TB = False
@dataclass
 class RunInfo:
    algo: str
    alpha: float
    reward_mode: str
    path: Path
 def parse_run_name(name: str) -> RunInfo | None:
    """Extract algo, alpha, reward_mode from run directory name."""
    # patterns: ppo_a0.20_robust, cmp_fixed_a0.20, sac_a0.90_robust
    m = re.match(r'(cmp_)?(\w+)_a([\d.]+)_?(\w+)?', name)
    if not m:
        return None
    prefix, algo, alpha, mode = m.groups()
    return RunInfo(algo=algo, alpha=float(alpha), reward_mode=mode or 'robust', path=Path())
 def load_tb_scalars(log_dir: Path, tags: list[str], reduce: str = 'last') -> dict[str, float]:
    """Load scalar values from TensorBoard event files."""
    if not HAS_TB:
        return {}
    ea = EventAccumulator(str(log_dir))
    ea.Reload()
    results = {}
    for tag in tags:
        if tag in ea.Tags().get('scalars', []):
            events = ea.Scalars(tag)
            if not events:
                continue
            vals = [e.value for e in events]
            if reduce == 'last':
                results[tag] = vals[-1]
            elif reduce == 'mean':
                results[tag] = sum(vals) / len(vals)
            elif reduce == 'max':
                results[tag] = max(vals)
            elif reduce == 'min':
                results[tag] = min(vals)
    return results
 def load_json_results(log_dir: Path) -> dict[str, float]:
    """Load metrics from results.json if available."""
    results_file = log_dir / 'results.json'
    if results_file.exists():
        with open(results_file) as f:
            return json.load(f)
    return {}
 def discover_runs(base_dir: Path) -> list[RunInfo]:
    """Find all experiment runs in base directory."""
    runs = []
    for d in base_dir.iterdir():
        if not d.is_dir():
            continue
        info = parse_run_name(d.name)
        if info:
            info.path = d
            runs.append(info)
    return runs
 def build_tables(runs: list[RunInfo], metrics: list[str], reduce: str = 'last') -> dict[str, dict[str, pd.DataFrame]]:
    """Build pivot tables: reward_mode -> metric -> DataFrame[alpha x algo]."""
    # collect data: {reward_mode: {metric: {(alpha, algo): value}}}
    data = defaultdict(lambda: defaultdict(dict))
    tb_tags = [f'economics/{m}' if m in ['revenue', 'profit', 'margin'] else f'coi/{m}' if m in ['erosion', 'leakage'] else f'alpha/{m}' for m in metrics]
    tag_map = dict(zip(tb_tags, metrics))
    for run in runs:
        # try json first (final eval metrics)
        jm = load_json_results(run.path)
        tb = load_tb_scalars(run.path, tb_tags, reduce)
        for tag, metric in tag_map.items():
            val = None
            json_key = f'{metric}_mean' if metric != 'reward' else 'reward_mean'
            if json_key in jm:
                val = jm[json_key]
            elif tag in tb:
                val = tb[tag]
            if val is not None:
                data[run.reward_mode][metric][(run.alpha, run.algo)] = val
    # convert to DataFrames
    tables = {}
    for mode, metrics_data in data.items():
        tables[mode] = {}
        for metric, vals in metrics_data.items():
            if not vals:
                continue
            alphas = sorted(set(a for a, _ in vals.keys()))
            algos = sorted(set(al for _, al in vals.keys()))
            df = pd.DataFrame(index=alphas, columns=algos, dtype=float)
            for (a, al), v in vals.items():
                df.loc[a, al] = v
            df.index.name = 'alpha'
            tables[mode][metric] = df
    return tables
 def format_table(df: pd.DataFrame, fmt: str = '.3f') -> str:
    """Format DataFrame as markdown table."""
    return df.to_markdown(floatfmt=fmt)
 def summarize(base_dir: str = 'sim/case/thesis_simplified/runs',
              metrics: list[str] | None = None,
              reduce: str = 'last',
              output: str | None = None) -> dict:
    """Generate summary tables from experiment runs."""
    base = Path(base_dir)
    metrics = metrics or ['revenue', 'profit', 'margin', 'erosion', 'leakage']
    runs = discover_runs(base)
    if not runs:
        print(f"No runs found in {base}")
        return {}
    print(f"Found {len(runs)} runs")
    tables = build_tables(runs, metrics, reduce)
    lines = []
    for mode, metric_tables in sorted(tables.items()):
        lines.append(f"\n# Reward Mode: {mode}\n")
        for metric, df in sorted(metric_tables.items()):
            lines.append(f"\n## {metric}\n")
            lines.append(format_table(df))
            lines.append("")
    report = '\n'.join(lines)
    print(report)
    if output:
        Path(output).write_text(report)
        print(f"\nSaved to {output}")
    return tables
 if __name__ == '__main__':
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument('--dir', default='sim/case/thesis_simplified/runs')
    p.add_argument('--metrics', nargs='+', default=['revenue', 'profit', 'margin', 'erosion', 'leakage'])
    p.add_argument('--reduce', default='last', choices=['last', 'mean', 'max', 'min'])
    p.add_argument('--output', '-o', help='save markdown to file')
    args = p.parse_args()
    summarize(args.dir, args.metrics, args.reduce, args.output)
--- a/sim/case/thesis_simplified/train.py
+++ b/sim/case/thesis_simplified/train.py
@@ -0,0 +1,336 @@
 """RL training for thesis pricing system with thesis-aligned metrics.
 Trains pricing policies using stable-baselines3 with TensorBoard logging.
 Tracks COI erosion, alpha estimation error, and economic KPIs per thesis formulation.
 """
 from __future__ import annotations
 import argparse
 import json
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from dataclasses import dataclass, asdict, field
 from pathlib import Path
 from typing import Dict, List, Callable, Any
 import numpy as np
 try:
    from stable_baselines3 import PPO, SAC, A2C
    from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
    from stable_baselines3.common.vec_env import DummyVecEnv
    from stable_baselines3.common.monitor import Monitor
    HAS_SB3 = True
 except ImportError:
    HAS_SB3 = False
 try:
    from torch.utils.tensorboard import SummaryWriter
    HAS_TB = True
 except ImportError:
    HAS_TB = False
 from .simplified_env import PricingEnv, EnvConfig, make_env, adaptive_policy, fixed_price_policy, random_policy
@dataclass
 class EpisodeMetrics:
    reward: float = 0.0
    revenue: float = 0.0
    profit: float = 0.0
    coi_erosion: float = 0.0
    coi_leakage: float = 0.0
    alpha_error: float = 0.0
    avg_margin: float = 0.0
    n_agents: int = 0
    steps: int = 0
    def accumulate(self, info: Dict[str, Any]) -> None:
        self.steps += 1
        self.reward += info.get('reward', 0)
        self.revenue += info.get('revenue', 0)
        self.profit += info.get('profit', 0)
        self.coi_erosion += info.get('coi_erosion', 0)
        self.coi_leakage += info.get('coi_leakage', 0)
        self.alpha_error += abs(info.get('alpha_true', 0) - info.get('alpha_est', 0))
        self.avg_margin += info.get('avg_margin', 0)
        self.n_agents += info.get('n_agents', 0)
    def normalized(self) -> Dict[str, float]:
        s = max(self.steps, 1)
        return {k: getattr(self, k) / s for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin', 'n_agents']}
@dataclass
 class ExperimentConfig:
    algo: str = "ppo"
    total_timesteps: int = 100_000
    n_envs: int = 4
    eval_freq: int = 5000
    n_eval_episodes: int = 10
    log_dir: str = "sim/case/thesis_simplified/runs"
    seed: int = 42
    n_products: int = 10
    max_steps: int = 200
    alpha_true: float = 0.2
    reward_mode: str = "robust"
    experiment_name: str | None = None
    def __post_init__(self):
        if self.experiment_name is None:
            self.experiment_name = f"{self.algo}_a{self.alpha_true:.2f}_{self.reward_mode}"
 class Policy:
    """Unified policy interface for baselines and trained models."""
    def __init__(self, policy_fn: Callable[[np.ndarray, int], np.ndarray], name: str):
        self._fn, self.name = policy_fn, name
    def predict(self, obs: np.ndarray, deterministic: bool = True) -> tuple[np.ndarray, None]:
        return self._fn(obs, (len(obs) - 3) // 3), None
    @staticmethod
    def fixed(margin: float = 0.15) -> "Policy":
        return Policy(lambda obs, n: fixed_price_policy(np.ones(n), margin), f"fixed_{margin:.2f}")
    @staticmethod
    def adaptive(base_margin: float = 0.15) -> "Policy":
        return Policy(lambda obs, n: adaptive_policy(obs, n, base_margin), f"adaptive_{base_margin:.2f}")
    @staticmethod
    def random() -> "Policy":
        return Policy(lambda obs, n: random_policy(n), "random")
    @staticmethod
    def myopic(greed: float = 0.3) -> "Policy":
        def _fn(obs: np.ndarray, n: int) -> np.ndarray:
            demand_norm = obs[n:2*n] if len(obs) > 2*n else np.ones(n) * 0.5
            return np.ones(n, dtype=np.float32) * np.clip(1.0 + greed * (1 + np.mean(demand_norm)), 0.5, 1.5)
        return Policy(_fn, f"myopic_{greed:.1f}")
 def log_metrics(writer: SummaryWriter | None, metrics: Dict[str, float], prefix: str, step: int) -> None:
    if writer is None:
        return
    for k, v in metrics.items():
        writer.add_scalar(f'{prefix}/{k}', v, step)
 class MetricsCallback(BaseCallback):
    def __init__(self, writer: SummaryWriter | None, verbose: int = 0):
        super().__init__(verbose)
        self._writer = writer
    def _on_step(self) -> bool:
        if self._writer is None:
            return True
        for info in self.locals.get('infos', []):
            t = self.num_timesteps
            self._writer.add_scalar('economics/revenue', info.get('revenue', 0), t)
            self._writer.add_scalar('economics/profit', info.get('profit', 0), t)
            self._writer.add_scalar('economics/margin', info.get('avg_margin', 0), t)
            self._writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), t)
            self._writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), t)
            self._writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), t)
            self._writer.add_scalar('agents/count', info.get('n_agents', 0), t)
        return True
 def make_vec_env(cfg: ExperimentConfig, n_envs: int = 1) -> DummyVecEnv:
    def _make():
        return Monitor(make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
                                          alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed)))
    return DummyVecEnv([_make for _ in range(n_envs)])
 def run_episodes(policy: Policy | Any, env: PricingEnv, n_episodes: int) -> List[EpisodeMetrics]:
    """Run policy for n episodes and collect metrics."""
    metrics = []
    for _ in range(n_episodes):
        obs, _ = env.reset()
        ep, done = EpisodeMetrics(), False
        while not done:
            action, _ = policy.predict(obs, deterministic=True)
            obs, reward, term, trunc, info = env.step(action)
            done = term or trunc
            ep.accumulate(info)
            ep.reward += reward
        metrics.append(ep)
    return metrics
 def evaluate_policy(policy: Policy | Any, cfg: ExperimentConfig, n_episodes: int = 20) -> Dict[str, float]:
    env = make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
                             alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed + 999))
    metrics = run_episodes(policy, env, n_episodes)
    return {
        'reward_mean': np.mean([m.reward for m in metrics]), 'reward_std': np.std([m.reward for m in metrics]),
        **{f'{k}_mean': np.mean([m.normalized()[k] for m in metrics])
           for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin']},
    }
 def run_baseline(policy: Policy, vec_env: DummyVecEnv, total_steps: int, writer: SummaryWriter | None):
    obs, n_envs = vec_env.reset(), vec_env.num_envs
    ep_rewards = np.zeros(n_envs)
    for step in range(0, total_steps, n_envs):
        actions = np.array([policy.predict(obs[i])[0] for i in range(n_envs)])
        obs, rewards, dones, infos = vec_env.step(actions)
        ep_rewards += rewards
        for i, info in enumerate(infos):
            if writer:
                writer.add_scalar('economics/revenue', info.get('revenue', 0), step)
                writer.add_scalar('economics/profit', info.get('profit', 0), step)
                writer.add_scalar('economics/margin', info.get('avg_margin', 0), step)
                writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), step)
                writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), step)
                writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), step)
                writer.add_scalar('agents/count', info.get('n_agents', 0), step)
            if dones[i]:
                if writer:
                    writer.add_scalar('rollout/ep_reward', ep_rewards[i], step)
                ep_rewards[i] = 0
 def train(cfg: ExperimentConfig) -> Dict[str, Any]:
    is_baseline = cfg.algo.lower() in ["fixed", "adaptive", "random", "myopic"]
    if not HAS_SB3 and not is_baseline:
        raise ImportError("stable-baselines3 required: pip install stable-baselines3[extra]")
    log_path = Path(cfg.log_dir) / cfg.experiment_name
    log_path.mkdir(parents=True, exist_ok=True)
    with open(log_path / "config.json", "w") as f:
        json.dump(asdict(cfg), f, indent=2)
    writer = SummaryWriter(log_path) if HAS_TB else None
    train_env, eval_env = make_vec_env(cfg, cfg.n_envs), make_vec_env(cfg, 1)
    if is_baseline:
        policy = {"fixed": Policy.fixed, "adaptive": Policy.adaptive, "random": Policy.random, "myopic": Policy.myopic}[cfg.algo.lower()]()
        run_baseline(policy, train_env, cfg.total_timesteps, writer)
        final_metrics = evaluate_policy(policy, cfg)
    else:
        algo_cls = {"ppo": PPO, "sac": SAC, "a2c": A2C}[cfg.algo.lower()]
        common = dict(verbose=1, seed=cfg.seed, tensorboard_log=str(log_path), device="auto")
        model = {
            "ppo": lambda: PPO("MlpPolicy", train_env, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01, **common),
            "sac": lambda: SAC("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50_000, batch_size=512, tau=0.02, gamma=0.99, learning_starts=1000, ent_coef="auto_0.1", train_freq=4, **common),
            "a2c": lambda: A2C("MlpPolicy", train_env, learning_rate=7e-4, n_steps=5, gamma=0.99, **common),
        }[cfg.algo.lower()]()
        cb = MetricsCallback(writer)
        eval_cb = EvalCallback(eval_env, best_model_save_path=str(log_path / "best"), log_path=str(log_path),
                               eval_freq=cfg.eval_freq, n_eval_episodes=cfg.n_eval_episodes, deterministic=True)
        model.learn(cfg.total_timesteps, callback=[cb, eval_cb], progress_bar=True)
        model.save(log_path / "final_model")
        policy = model
        final_metrics = evaluate_policy(model, cfg)
    if writer:
        log_metrics(writer, final_metrics, 'final', cfg.total_timesteps)
        writer.close()
    train_env.close(); eval_env.close()
    with open(log_path / "results.json", "w") as f:
        json.dump(final_metrics, f, indent=2)
    return {"path": str(log_path), "metrics": final_metrics}
 def _train_alpha(args: tuple) -> tuple[str, Dict]:
    """Worker for parallel sweep - must be top-level for pickling."""
    cfg_dict, alpha = args
    cfg_dict["alpha_true"] = alpha
    cfg_dict["experiment_name"] = f"{cfg_dict['algo']}_a{alpha:.2f}_{cfg_dict['reward_mode']}"
    sweep_cfg = ExperimentConfig(**cfg_dict)
    print(f"[alpha={alpha:.2f}] starting")
    metrics = train(sweep_cfg)["metrics"]
    print(f"[alpha={alpha:.2f}] done")
    return f"alpha_{alpha:.2f}", metrics
 def run_sweep(cfg: ExperimentConfig, alphas: List[float] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
    alphas = alphas or [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    cfg_dict = asdict(cfg)
    if max_workers == 1:  # sequential fallback
        results = dict(_train_alpha((cfg_dict.copy(), a)) for a in alphas)
    else:
        with ProcessPoolExecutor(max_workers=max_workers) as pool:
            futures = {pool.submit(_train_alpha, (cfg_dict.copy(), a)): a for a in alphas}
            results = {}
            for fut in as_completed(futures):
                key, metrics = fut.result()
                results[key] = metrics
    summary_path = Path(cfg.log_dir) / f"sweep_{cfg.algo}_{cfg.reward_mode}.json"
    with open(summary_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"\nSweep results saved to {summary_path}")
    return results
 def _train_policy(args: tuple) -> tuple[str, Dict]:
    """Worker for parallel policy comparison."""
    cfg_dict, algo = args
    cfg_dict["algo"] = algo
    cfg_dict["experiment_name"] = f"cmp_{algo}_a{cfg_dict['alpha_true']:.2f}"
    cmp_cfg = ExperimentConfig(**cfg_dict)
    print(f"[{algo}] starting")
    metrics = train(cmp_cfg)["metrics"]
    print(f"[{algo}] done")
    return algo, metrics
 def compare_policies(cfg: ExperimentConfig, policies: List[str] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
    policies = policies or ["fixed", "adaptive", "myopic", "random"]
    cfg_dict = asdict(cfg)
    if max_workers == 1:
        results = dict(_train_policy((cfg_dict.copy(), p)) for p in policies)
    else:
        with ProcessPoolExecutor(max_workers=max_workers) as pool:
            futures = {pool.submit(_train_policy, (cfg_dict.copy(), p)): p for p in policies}
            results = {}
            for fut in as_completed(futures):
                algo, metrics = fut.result()
                results[algo] = metrics
    cmp_path = Path(cfg.log_dir) / f"compare_a{cfg.alpha_true:.2f}.json"
    with open(cmp_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"\nComparison saved to {cmp_path}")
    for algo, m in results.items():
        print(f"  {algo:12s}: reward={m['reward_mean']:.2f} coi_erosion={m['coi_erosion_mean']:.4f} alpha_err={m['alpha_error_mean']:.4f}")
    return results
 def main():
    parser = argparse.ArgumentParser(description="Train RL pricing policies")
    parser.add_argument("--algo", default="ppo", choices=["ppo", "sac", "a2c", "fixed", "adaptive", "random", "myopic"])
    parser.add_argument("--steps", type=int, default=100_000)
    parser.add_argument("--alpha", type=float, default=0.2)
    parser.add_argument("--reward-mode", default="robust", choices=["revenue", "profit", "robust", "coi_aware"])
    parser.add_argument("--n-products", type=int, default=10)
    parser.add_argument("--n-envs", type=int, default=4)
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--log-dir", default="sim/case/thesis_simplified/runs")
    parser.add_argument("--sweep", action="store_true", help="run contamination sweep")
    parser.add_argument("--compare", action="store_true", help="compare all baselines")
    parser.add_argument("--workers", type=int, default=None, help="max parallel workers for sweep (None=auto, 1=sequential)")
    args = parser.parse_args()
    cfg = ExperimentConfig(algo=args.algo, total_timesteps=args.steps, alpha_true=args.alpha,
                           reward_mode=args.reward_mode, n_products=args.n_products,
                           n_envs=args.n_envs, seed=args.seed, log_dir=args.log_dir)
    if args.sweep:
        run_sweep(cfg, max_workers=args.workers)
    elif args.compare:
        compare_policies(cfg, max_workers=args.workers)
    else:
        result = train(cfg)
        print(f"\nTraining complete: {result['path']}")
        print(f"Metrics: {json.dumps(result['metrics'], indent=2)}")
 if __name__ == "__main__":
    main()
--- a/sim/rl/behavior_loader/loader.py
+++ b/sim/rl/behavior_loader/loader.py
@@ -0,0 +1,97 @@
 import os
 import json
 from pydantic import BaseModel as Base
 class PayloadModel(Base):
    sessionId: str
    experimentId: str | None
    eventName: str
    page: str | None
    productId: str | None
    metadata: dict
    storeMode: str
    userAgent: str
    ts: str
 class ValueModel(Base):
    payload: PayloadModel
    encoding: str
    isPayloadNull: bool
    schemaId: int
    size: int
 class InteractionModel(Base):
    partitionID: int
    offset: int
    timestamp: int
    compression: str
    isTransactional: bool
    headers: list
    key: dict
    value: ValueModel
 def _is_admin(page: str | None) -> bool:
    return page is not None and page.startswith("/admin/")
 class Loader:
    def __init__(self, src_dir: str):
        self.src_dir = src_dir
        self.entries = os.listdir(src_dir)
        if not self.entries: raise ValueError("empty directory")
        self.data = self._load_sessions()
    def _load_sessions(self) -> dict:
        sessions = {}
        for entry in self.entries:
            with open(f"{self.src_dir}/{entry}/int.json") as f:
                raw = json.load(f)
            ints = [InteractionModel(**i) for i in raw]
            sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)]
        return sessions
    def get_data(self) -> dict:
        return self.data
    def get_entries(self) -> tuple[list[str], int]:
        return self.entries, len(self.entries)
 class AgentLoader(Loader):
    def _load_sessions(self) -> dict:
        sessions = {}
        for entry in self.entries:
            with open(f"{self.src_dir}/{entry}/int.json") as f:
                raw = json.load(f)
            ints = [PayloadModel(**i) for i in raw]
            sessions[entry] = [i for i in ints if not _is_admin(i.page)]
        return sessions
 class JointLoader:
    def __init__(self, human_dir: str, agent_dir: str):
        self.human_loader = Loader(human_dir)
        self.agent_loader = AgentLoader(agent_dir)
        self.data = self._merge()
        self.entries = list(self.data.keys())
    def _merge(self) -> dict:
        return {
            **{f"human_{sid}": [e.value.payload for e in evts]
               for sid, evts in self.human_loader.get_data().items()},
            **{f"agent_{sid}": evts
               for sid, evts in self.agent_loader.get_data().items()}
        }
    def get_data(self) -> dict:
        return self.data
    def get_entries(self) -> tuple[list[str], int]:
        return self.entries, len(self.entries)
 if __name__ == "__main__":
    agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
    human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
    for name, cls, path in [("agent", AgentLoader, agent_dir),
                             ("human", Loader, human_dir),
                             ("joint", lambda d: JointLoader(human_dir, d), agent_dir)]:
        ldr = cls(path) if name != "joint" else cls(agent_dir)
        print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -0,0 +1,256 @@
 try:
    from loader import Loader, AgentLoader, JointLoader
 except ImportError:
    from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader
 from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
 import graphviz
 import sys
 from pathlib import Path
 # import lib utilities for optional use - models keep their own _state_repr for backwards compat
 # with the specific event structure (evt.value.payload)
 sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib'))
 try:
    from lib.state import make_state_repr as lib_make_state_repr
    from lib.features import transition_histogram as lib_transition_histogram
 except ImportError:
    lib_make_state_repr = None
    lib_transition_histogram = None
 class BehaviorModel:
    def __init__(self, src_dir: str, loader_cls=Loader):
        self.loader = loader_cls(src_dir)
        self.data = self.loader.get_data()
        self.entries, self.num_entries = self.loader.get_entries()
        self.mdp = None
    def _state_repr(self, evt) -> str:
        p = evt.value.payload
        return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}"
    def _sort_key(self, evt):
        return evt.timestamp
    def _extract_sessions(self) -> List[List[str]]:
        trajs = []
        for evts in self.data.values():
            if len(evts) < 2: continue
            states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)]
            trajs.append(states)
        return trajs
    def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]:
        trans, states = defaultdict(lambda: defaultdict(int)), set()
        for traj in trajs:
            for s, s_next in zip(traj, traj[1:]):
                trans[s][s_next] += 1
                states.update([s, s_next])
        return trans, states
    def _calc_rewards(self, trajs: List[List[str]]) -> Dict:
        rwd = defaultdict(list)
        for traj in trajs:
            n = len(traj)
            for i, s in enumerate(traj):
                rwd[s].append(i / n)
        return rwd
    def _normalize_trans(self, cnts: Dict) -> Dict:
        return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()}
                for s, nxt in cnts.items()}
    def build_MDP(self) -> Dict:
        trajs = self._extract_sessions()
        trans_cnt, states = self._calc_transitions(trajs)
        trans_prob = self._normalize_trans(trans_cnt)
        state_rwd = self._calc_rewards(trajs)
        self.mdp = {
            'states': sorted(states),
            'num_states': len(states),
            'transitions': trans_prob,
            'state_values': {s: np.mean(r) for s, r in state_rwd.items()},
            'state_rewards': state_rwd,
            'trans_counts': trans_cnt,
        }
        return self.mdp
    def transition_prob(self, s: str, s_next: str) -> float:
        if not self.mdp: raise ValueError("build MDP first")
        return self.mdp['transitions'].get(s, {}).get(s_next, 0.0)
    def state_value(self, s: str) -> float:
        if not self.mdp: raise ValueError("build MDP first")
        return self.mdp['state_values'].get(s, 0.0)
    def sample_traj(self, start: str, max_len: int = 50) -> List[str]:
        if not self.mdp: raise ValueError("build MDP first")
        path, curr = [start], start
        for _ in range(max_len):
            nxt = self.mdp['transitions'].get(curr, {})
            if not nxt: break
            curr = np.random.choice(list(nxt.keys()), p=list(nxt.values()))
            path.append(curr)
        return path
    def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray:
        """Convert trajectory to feature vector using MDP structure for contrastive learning"""
        if not self.mdp:
            self.build_MDP()
        states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)]
        features = []
        # transition histogram over MDP state space
        trans_counts = defaultdict(int)
        for s, s_next in zip(states, states[1:]):
            trans_counts[(s, s_next)] += 1
        all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()]
        trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]]
        trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec))  # pad
        total_trans = sum(trans_counts.values()) or 1
        features.extend([v / total_trans for v in trans_vec])
        # state coverage ratio
        visited = set(states)
        features.append(len(visited) / max(self.mdp['num_states'], 1))
        # temporal entropy of transitions
        if len(states) > 1:
            trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])]
            entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0)
            features.append(entropy / max(len(states), 1))
        else:
            features.append(0.0)
        # trajectory length and unique state count
        features.append(len(states))
        features.append(len(visited))
        # state value statistics along trajectory
        vals = [self.state_value(s) for s in states]
        if vals:
            features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)])
        else:
            features.extend([0.0, 0.0, 0.0, 0.0])
        return np.array(features, dtype=np.float32)
 class AgentBehaviorModel(BehaviorModel):
    def __init__(self, src_dir: str):
        super().__init__(src_dir, AgentLoader)
    def _state_repr(self, evt) -> str:
        return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
    def _sort_key(self, evt):
        return evt.ts
 class JointBehaviorModel(BehaviorModel):
    def __init__(self, human_dir: str, agent_dir: str):
        self.loader = JointLoader(human_dir, agent_dir)
        self.data = self.loader.get_data()
        self.entries, self.num_entries = self.loader.get_entries()
        self.mdp = None
    def _state_repr(self, evt) -> str:
        return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
    def _sort_key(self, evt):
        return evt.ts
 def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
    evt_trans = defaultdict(lambda: defaultdict(float))
    for s, trans in mdp['transitions'].items():
        src = s.split('|')[2]
        for s_next, prob in trans.items():
            dst = s_next.split('|')[2]
            evt_trans[src][dst] += prob
    for src in evt_trans:
        total = sum(evt_trans[src].values())
        if total > 0:
            evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()}
    return dict(evt_trans)
 def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph",
                  fmt: str = "svg", view: bool = False, export_dot: bool = False):
    if not model.mdp: raise ValueError("build MDP first")
    evt_trans = aggregate_event_transitions(model.mdp)
    g = graphviz.Digraph(format=fmt)
    g.attr(rankdir='LR', size='30')
    g.attr('node', shape='circle', width='1', height='1')
    events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()}
    for evt in events:
        g.node(evt)
    for src, dsts in evt_trans.items():
        for dst, prob in dsts.items():
            if prob > threshold:
                g.edge(src, dst, label=f'{prob:.2f}')
    g.render(output, view=view, cleanup=True)
    print(f"Saved MDP graph to {output}.{fmt}")
    if export_dot:
        with open(f"{output}.dot", 'w') as f:
            f.write(g.source)
        print(f"Exported DOT source to {output}.dot")
    return g
 def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float:
    eps = 1e-10
    # p + log(p / q) summed over all keys in P
    return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p)
 if __name__ == "__main__":
    base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
    human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
    human_model = BehaviorModel(human_dir)
    human_mdp = human_model.build_MDP()
    print(f"Built MDP: {human_mdp['num_states']} states, "
          f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions")
    if not human_mdp['states']:
        exit("No states found")
    visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True)
    agent_model = AgentBehaviorModel(agent_dir)
    agent_mdp = agent_model.build_MDP()
    print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
          f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
    if not agent_mdp['states']:
        exit("No states found")
    visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True)
    human_evt = aggregate_event_transitions(human_mdp)
    agent_evt = aggregate_event_transitions(agent_mdp)
    common = set(human_evt.keys()) & set(agent_evt.keys())
    if not common:
        exit("No common event types for KL divergence analysis")
    kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common],
                     key=lambda x: x[1], reverse=True)
    print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}")
    print("\nMost divergent event types:")
    for evt, kl in kl_divs:
        print(f"  {evt}: {kl:.4f}")
    print("\n=== Joint Model (Human + Agent Combined) ===")
    joint_model = JointBehaviorModel(human_dir, agent_dir)
    joint_mdp = joint_model.build_MDP()
    print(f"Built joint MDP: {joint_mdp['num_states']} states, "
          f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
    if joint_mdp['states']:
        visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True)
--- a/sim/rl/engine.py
+++ b/sim/rl/engine.py
@@ -0,0 +1,240 @@
 from os import kill
 import numpy as np
 import pandas as pd
 from abc import ABC, abstractmethod
 from typing import Dict, Any
 from sim.rl.environment import BusinessLogicConstraints
 """
 An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
 From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
 """
 class BasePricingEngine(ABC):
    """base interface for all pricing engines"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        self.c = constraints
        self.rng = np.random.default_rng(seed)
        self.step_count = 0
    @abstractmethod
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        """compute new prices given current state and observation from environment
        args:
            current_prices: current price vector [N]
            observation: dict containing 'price', 'demand', and possibly interaction data
        returns:
            new_prices: updated price vector [N]
        """
        pass
    def update(self, observation: Dict[str, Any], reward: float, done: bool, info: Dict[str, Any]) -> None:
        """Default no-op update. Engines can override as needed."""
        self.last_observation = observation
        self.last_reward = reward
        self.last_info = info
    def reset(self):
        """reset engine state for new episode"""
        self.step_count = 0
 class WildPricingEngine(BasePricingEngine):
    """production-like pricing using online elasticity estimation via EWMA regression"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        super().__init__(constraints, seed)
        # per-product unit costs (unknown to customers; known to platform)
        self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catalogue_size).astype(np.float32)
        # online elasticity estimate (start moderately elastic)
        self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32)
        # EWMA state for log-log regression
        self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
        self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
        self.cov_pq  = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
        self.var_p   = np.ones(self.c.product_catalogue_size, dtype=np.float32)
        # knobs typical in production
        self.lr = 0.08
        self.ewma = 0.05
        self.eps_explore = 0.03
        self.explore_scale = 0.03
    def _safe_elasticity(self, e: np.ndarray) -> np.ndarray:
        return np.clip(e, -5.0, -1.05)
    def reset(self):
        super().reset()
        self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
        self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
        self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
        self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
        self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
        # extract demand signal (from env observation) as proxy for sales
        demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
        return self._update_from_demand(current_prices, demand)
    def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
        # log transforms (add 1 to handle zeros)
        logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32)
        logq = np.log(sold + 1.0).astype(np.float32)
        # EWMA moments for per-product regression: logq ≈ a + e*logp
        a = self.ewma
        dp = logp - self.mu_logp
        dq = logq - self.mu_logq
        self.mu_logp = (1 - a) * self.mu_logp + a * logp
        self.mu_logq = (1 - a) * self.mu_logq + a * logq
        self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq)
        self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6)
        e_new = self.cov_pq / (self.var_p + 1e-6)
        self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new)
        # profit-optimal price for isoelastic demand (if e < -1)
        e = self.e_hat
        p_star = self.unit_cost * (e / (e + 1.0))
        # smooth toward p_star
        new_prices = (1 - self.lr) * prices + self.lr * p_star
        # exploration (small random perturbations)
        if self.rng.random() < self.eps_explore:
            noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32)
            new_prices = new_prices * (1.0 + noise)
        # apply business guardrails (max change + bounds)
        max_adj = self.c.max_price_adjustment
        ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj)
        new_prices = prices * ratio
        new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
        return new_prices
 class StaticPricingEngine(BasePricingEngine):
    """baseline: fixed prices throughout episode"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        super().__init__(constraints, seed)
        self.fixed_prices = None
    def reset(self):
        super().reset()
        self.fixed_prices = None
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
        if self.fixed_prices is None:
            self.fixed_prices = current_prices.copy()
        return self.fixed_prices.copy()
 class SimpleDemandEngine(BasePricingEngine):
    """demand-driven pricing: increase price when demand rises, decrease when it falls"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        super().__init__(constraints, seed)
        self.prev_demand = None
        self.lr = 0.05
    def reset(self):
        super().reset()
        self.prev_demand = None
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
        demand = _extract_demand(observation, self.c.product_catalogue_size)
        if self.prev_demand is None:
            self.prev_demand = demand.copy()
            return current_prices.copy()
        # simple rule: if demand increases, raise price; if decreases, lower price
        delta_d = demand - self.prev_demand
        price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0)
        new_prices = current_prices * (1.0 + price_adj)
        self.prev_demand = demand.copy()
        # apply constraints
        max_adj = self.c.max_price_adjustment
        ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
        new_prices = current_prices * ratio
        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
 class RandomWalkEngine(BasePricingEngine):
    """random walk pricing with mean reversion"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        super().__init__(constraints, seed)
        self.target_price = None
        self.volatility = 0.02
    def reset(self):
        super().reset()
        self.target_price = None
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
        if self.target_price is None:
            self.target_price = current_prices.copy()
        # random walk with mean reversion toward target
        noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32)
        reversion = 0.01 * (self.target_price - current_prices)
        new_prices = current_prices * (1.0 + noise) + reversion
        # apply constraints
        max_adj = self.c.max_price_adjustment
        ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
        new_prices = current_prices * ratio
        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
 class ThompsonSamplingEngine(BasePricingEngine):
    """bayesian bandit approach per product treating price as discrete action"""
    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
        super().__init__(constraints, seed)
        self.n_price_levels = 5
        self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
        self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
        self.price_grid = None
        self.last_actions = None
    def reset(self):
        super().reset()
        self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
        self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
        self.price_grid = None
        self.last_actions = None
    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
        if self.price_grid is None:
            # define price grid per product
            lo = current_prices * 0.7
            hi = current_prices * 1.3
            self.price_grid = np.linspace(lo, hi, self.n_price_levels).T
        demand = _extract_demand(observation, self.c.product_catalogue_size)
        # update beliefs based on last action
        if self.last_actions is not None:
            for i in range(self.c.product_catalogue_size):
                a = self.last_actions[i]
                reward = demand[i]
                if reward > 0.5:
                    self.alpha[i, a] += reward
                else:
                    self.beta[i, a] += 1.0
        # thompson sampling: sample from posterior, pick best
        new_prices = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
        actions = np.zeros(self.c.product_catalogue_size, dtype=int)
        for i in range(self.c.product_catalogue_size):
            theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32)
            actions[i] = int(np.argmax(theta))
            new_prices[i] = self.price_grid[i, actions[i]]
        self.last_actions = actions
        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
 def _extract_demand(observation: Dict[str, Any], n: int) -> np.ndarray:
    if "elasticity" in observation and isinstance(observation["elasticity"], dict):
        d = observation["elasticity"].get("demand")
        if d is not None:
            return np.asarray(d, dtype=np.float32)
    d = observation.get("demand")
    if d is not None:
        return np.asarray(d, dtype=np.float32)
    return np.zeros(n, dtype=np.float32)
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -1,451 +1,244 @@
-import gymnasium as gym
+from __future__ import annotations
-from gymnasium import spaces
+
 import numpy as np
 from dataclasses import dataclass
-import pandas as pd
+from typing import Any, Dict, Optional, Tuple
 from typing import Callable, Optional, Dict, Any, List
-# "learner"  agent learning to optimize pricing
+import numpy as np
-# "agent"  part of environment creating demand signals that learner processes
+
 try:
    import gymnasium as gym
    from gymnasium import spaces
 except ImportError as e:
    raise ImportError("sim.rl.environment requires gymnasium") from e
 from sim.case.thesis_simplified.coi import COIWindow, coi_erosion, compute_coi_window
 from sim.case.thesis_simplified.separability import estimate_alpha as estimate_session_alpha
 from sim.case.thesis_simplified.simplified import Limbo, Session, put_prices_to_market
 from sim.rl.thesis_core import aggregate_demand_by_product, aggregate_purchases, constrain_prices
@dataclass(frozen=True)
 class BusinessLogicConstraints:
    product_catalogue_size: int = 100
    max_steps: int = 2000
    sessions_per_step: int = 250
@dataclass
 class BusinessLogicConstraints():
    max_price_adjustment: float = 0.30
    system_max_price: float = 500.0
    system_min_price: float = 1.0
-    product_catelogue_size: int = 100
+    max_price_adjustment: float = 0.30
-    episode_length: int = 200
+    min_margin_pct: float = 0.05
-    sessions_per_step: int = 250
+
-    agent_share: float = 0.25
+    agent_share: float = 0.2
-    agent_recon_multiplier: float = 6.0
+    alpha_drift: float = 0.0
-    agent_purchase_probability: float = 0.20
+    alpha_bounds: tuple[float, float] = (0.0, 0.8)
    coi_strength: float = 0.25
    coi_threshold: float = 4.0
    coi_sigmoid_temp: float = 1.25
    base_human_demand: float = 0.08
    base_agent_demand: float = 0.05
    human_price_elasticity: float = -1.2
    agent_price_elasticity: float = -0.6
    w_agent_loss: float = 1.0
    w_volatility: float = 5.0
    w_estimation_error: float = 0.25
    seed: int = 7
-def _sigmoid(x: np.ndarray) -> np.ndarray:
+def make_env(constraints: Optional[BusinessLogicConstraints] = None) -> "PHANTOMEnv":
-    return 1.0 / (1.0 + np.exp(-x))
+    return PHANTOMEnv(constraints=constraints or BusinessLogicConstraints())
 def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
    # baseline heuristic: high velocity + low conversion
    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
 class CommercePlatform:
    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
                 use_defense: bool = False):
        self.product_catelogue_size = product_catelogue_size
        self.max_price = max_price
        self.min_price = min_price
        self.constraints = constraints
        self.use_defense = use_defense
        self.agent_detector = agent_detector
        self.simulation_history: List[Dict[str, Any]] = []
        self._rng = np.random.default_rng(constraints.seed)
        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
        # ground truth purchase propensities
        p = np.clip(prices, self.min_price, self.max_price)
        pn = p / self.max_price
        human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
        agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
        return {
            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
        }
    def _session_markup_multiplier(self, signal_score: float) -> float:
        # session-based COI markup based on demand signal expression
        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
    def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
        demand = self.setup_true_demand(base_prices)
        human_pprob = demand["human_purchase_prob"]
        agent_pprob = demand["agent_purchase_prob"]
        events: List[Dict[str, Any]] = []
        T = self.constraints.sessions_per_step
        n_agent_sessions = int(round(T * self.constraints.agent_share))
        n_human_sessions = T - n_agent_sessions
        # human sessions: normal browse with possible purchase
        for s in range(n_human_sessions):
            session_id = f"h_{len(events)}_{s}"
            k = int(self._rng.integers(1, 4))
            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
            t = 0.0
            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
            signal_score = 0.0
            purchased_any = False
            for i, pid in enumerate(prod_ids):
                t += float(inter_times[i])
                price_shown = float(base_prices[pid])
                events.append({
                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
                })
                signal_score += 1.0
                if self._rng.random() < 0.35:
                    t += float(inter_times[i + k])
                    events.append({
                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
                    })
                    signal_score += 2.0
                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
                    t += float(inter_times[i + 2 * k])
                    mult = self._session_markup_multiplier(signal_score)
                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
                    events.append({
                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
                    })
                    purchased_any = True
        # agent sessions: split recon/purchase to circumvent COI
        n_agent_ids = max(1, n_agent_sessions // 2)
        for a in range(n_agent_ids):
            agent_id = f"a_{a}"
            recon_session_id = f"{agent_id}_recon"
            t = 0.0
            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
            recon_signal = 0.0
            for i, pid in enumerate(prod_ids):
                t += float(inter_times[i])
                events.append({
                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
                })
                recon_signal += 1.0
            # clean purchase session with minimal interactions
            if self._rng.random() < self.constraints.agent_purchase_probability:
                purchase_session_id = f"{agent_id}_clean"
                pid = int(self._rng.integers(0, self.product_catelogue_size))
                t2 = 0.0
                clean_signal = 0.0
                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
                events.append({
                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
                })
                clean_signal += 1.0
                if self._rng.random() < float(agent_pprob[pid]):
                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
                    obs_mult = self._session_markup_multiplier(clean_signal)
                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
                    events.append({
                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
                    })
        return pd.DataFrame(events)
    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
        if interaction_df.empty:
            return {"mean_sale_price": 0.0, "look_to_book": 0.0}
        purchases = interaction_df[interaction_df["action"] == "purchase"]
        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
        views = float((interaction_df["action"] == "view").sum())
        buys = float((interaction_df["action"] == "purchase").sum())
        return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
        if df.empty:
            return pd.DataFrame()
        g = df.groupby("session_id", sort=False)
        session_duration = g["t"].max() - g["t"].min()
        total_interactions = g.size()
        avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
        interaction_velocity = total_interactions / (session_duration + 1e-6)
        views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
        cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
        purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
        conversion_rate = purchases / (views + 1e-6)
        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
        return pd.DataFrame({
            "session_duration_sec": session_duration.astype(float),
            "avg_time_between_events": avg_time_between.astype(float),
            "total_interactions": total_interactions.astype(int),
            "interaction_velocity": interaction_velocity.astype(float),
            "item_views": views.astype(int),
            "cart_adds": cart_adds.astype(int),
            "purchases": purchases.astype(int),
            "conversion_rate": conversion_rate.astype(float),
            "is_agent": is_agent.astype(bool),
        }).reset_index()
    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
        # proxy demand from weighted interaction events
        if interaction_df.empty:
            return np.zeros(self.product_catelogue_size, dtype=np.float32)
        df = interaction_df
        if exclude_sessions is not None:
            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
            df = df[~df["session_id"].isin(bad_sessions)]
        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
        prod = df["product_id"].to_numpy(dtype=int)
        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
        np.add.at(q_hat, prod, w)
        return q_hat.astype(np.float32)
    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
        interaction_df = self._simulate_sessions(prices)
        self._last_interaction_df = interaction_df
        session_df = self._session_feature_table(interaction_df)
        predicted_agent_sessions = None
        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
            if predicted_agent_sessions is not None else q_hat_naive.copy()
        true_human = np.zeros(self.product_catelogue_size, dtype=float)
        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
        if not interaction_df.empty:
            purchases = interaction_df[interaction_df["action"] == "purchase"]
            if not purchases.empty:
                for _, r in purchases.iterrows():
                    if r["actor"] == "human":
                        true_human[int(r["product_id"])] += 1.0
                    else:
                        true_agent[int(r["product_id"])] += 1.0
        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
        agent_loss = max(0.0, revenue_oracle - revenue_observed)
        eps = 1e-6
        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
        interaction_features = self.compute_interaction_features(interaction_df)
        summary = {
            "prices": prices.copy(),
            "interaction_df": interaction_df,
            "session_df": session_df,
            "q_hat_naive": q_hat_naive,
            "q_hat_defended": q_hat_defended,
            "true_human_demand": true_human.astype(np.float32),
            "true_agent_purchases": true_agent.astype(np.float32),
            "internal_error_naive": internal_error_naive.astype(np.float32),
            "internal_error_defended": internal_error_def.astype(np.float32),
            "interaction_features": interaction_features,
            "revenue_observed": revenue_observed,
            "revenue_oracle": revenue_oracle,
            "agent_loss": agent_loss,
            "predicted_agent_sessions": predicted_agent_sessions,
        }
        self.simulation_history.append(summary)
        return summary
    def get_interaction_data(self) -> np.ndarray:
        if self._last_interaction_df.empty:
            return np.array([], dtype=object)
        return self._last_interaction_df.to_dict(orient="records")
 class PHANTOMEnv(gym.Env):
-    metadata = {"render_modes": []}
+    metadata = {"render_modes": ["human", "ansi"]}
-    def __init__(self, use_defense: bool = False):
+    def __init__(self, constraints: Optional[BusinessLogicConstraints] = None):
        super().__init__()
-        self.constraints = BusinessLogicConstraints()
+        self.c = constraints or BusinessLogicConstraints()
-        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
+        self.n = int(self.c.product_catalogue_size)
-                                       high=self.constraints.max_price_adjustment,
+
-                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+        self._rng = np.random.default_rng(self.c.seed)
-        self.observation_space = spaces.Dict({
+        self._t = 0
-            "elasticity": spaces.Dict({
+        self._alpha_true = float(self.c.agent_share)
-                "price": spaces.Box(
+        self._alpha_hat = float(self.c.agent_share)
-                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
+        self._costs = np.zeros(self.n, dtype=np.float32)
-                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
+        self._refs = np.zeros(self.n, dtype=np.float32)
-                    dtype=np.float32),
+        self._prices: Optional[np.ndarray] = None
-                "demand": spaces.Box(
+        self._last_sessions: list[Session] = []
-                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+        self._last_coi: COIWindow | None = None
-                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
+        self._limbo = Limbo()
-                    dtype=np.float32),
+
-            })
+        self.action_space = spaces.Box(
-        })
+            low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
-        self.commerce_platform = CommercePlatform(
+            high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
-            product_catelogue_size=self.constraints.product_catelogue_size,
+            dtype=np.float32,
-            max_price=self.constraints.system_max_price,
+        )
-            min_price=self.constraints.system_min_price,
+        self.observation_space = spaces.Dict(
-            constraints=self.constraints,
+            {
-            agent_detector=simple_agent_detector,
+                "elasticity": spaces.Dict(
-            use_defense=use_defense)
+                    {
-        self._rng = np.random.default_rng(self.constraints.seed)
+                        "price": spaces.Box(
-        self.t = 0
+                            low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
-        self._prev_prices: Optional[np.ndarray] = None
+                            high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
-        self.state: Dict[str, Any] = {}
+                            dtype=np.float32,
                        ),
                        "demand": spaces.Box(
                            low=np.zeros((self.n,), dtype=np.float32),
                            high=np.full((self.n,), 1e9, dtype=np.float32),
                            dtype=np.float32,
                        ),
                    }
                ),
                "market": spaces.Dict(
                    {
                        "alpha_hat": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
                        "revenue_rate": spaces.Box(low=0.0, high=1e12, shape=(1,), dtype=np.float32),
                        "conversion_rate": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
                        "price_volatility": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
                    }
                ),
                "cost": spaces.Box(
                    low=np.zeros((self.n,), dtype=np.float32),
                    high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
                    dtype=np.float32,
                ),
            }
        )
    def _reset_catalogue(self) -> None:
        self._costs = self._rng.uniform(15.0, 60.0, size=self.n).astype(np.float32)
        margins = self._rng.uniform(0.2, 0.6, size=self.n).astype(np.float32)
        self._refs = (self._costs * (1.0 + margins)).astype(np.float32)
        self._prices = self._refs.copy()
    def _observe_market(
        self, prices: np.ndarray
    ) -> tuple[list[Session], Dict[str, float], np.ndarray, np.ndarray, float, float, int]:
        sessions, demand_map = put_prices_to_market(
            prices,
            costs=self._costs,
            alpha=self._alpha_true,
            n_sessions=int(self.c.sessions_per_step),
            seed=int(self._rng.integers(0, 2**31 - 1)),
        )
        demand_by_product = aggregate_demand_by_product(sessions, demand_map, self.n)
        purchases, revenue, cost, n_agents = aggregate_purchases(sessions, self._costs, self.n)
        conversion = float(np.sum(purchases) / max(len(sessions), 1))
        return sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents
    def _update_alpha_hat(self, sessions: list[Session]) -> float:
        scores = [estimate_session_alpha(s) for s in sessions if s.events]
        if not scores:
            return self._alpha_hat
        alpha_step = float(np.mean(scores))
        self._alpha_hat = 0.8 * self._alpha_hat + 0.2 * alpha_step
        self._alpha_hat = float(np.clip(self._alpha_hat, 0.0, 1.0))
        return self._alpha_hat
    def _reward(self, prices: np.ndarray, revenue: float, cost: float, volatility: float) -> float:
        profit = float(revenue - cost)
        coi_leak = float(self._last_coi.leak) if self._last_coi else 0.0
        alpha_err = abs(self._alpha_hat - self._alpha_true)
        return profit - self.c.coi_strength * coi_leak - self.c.w_volatility * volatility - self.c.w_estimation_error * alpha_err
    def _build_obs(
        self,
        prices: np.ndarray,
        demand_by_product: np.ndarray,
        revenue: float,
        conversion: float,
        volatility: float,
    ) -> Dict[str, Any]:
        return {
            "elasticity": {"price": prices.astype(np.float32), "demand": demand_by_product.astype(np.float32)},
            "market": {
                "alpha_hat": np.array([self._alpha_hat], dtype=np.float32),
                "revenue_rate": np.array([revenue], dtype=np.float32),
                "conversion_rate": np.array([conversion], dtype=np.float32),
                "price_volatility": np.array([volatility], dtype=np.float32),
            },
            "cost": self._costs.astype(np.float32),
        }
    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        if seed is not None:
            self._rng = np.random.default_rng(seed)
-            self.commerce_platform._rng = np.random.default_rng(seed)
+        self._t = 0
-        self.t = 0
+        self._alpha_true = float(np.clip(self.c.agent_share, *self.c.alpha_bounds))
-        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
+        self._alpha_hat = float(self.c.agent_share)
-        self._prev_prices = init_prices.copy()
+        self._reset_catalogue()
-        self.state = {
+        self._limbo = Limbo()
-            "elasticity": {
+        self._last_sessions = []
-                "price": init_prices,
+        self._last_coi = None
                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
            }
        }
        return self.state, {}
-    def step(self, action: np.ndarray):
+        prices = self._prices if self._prices is not None else np.zeros(self.n, dtype=np.float32)
-        self.t += 1
+        obs = self._build_obs(prices, np.zeros(self.n, dtype=np.float32), 0.0, 0.0, 0.0)
-        base_prices = self.state["elasticity"]["price"].astype(np.float32)
+        return obs, {"alpha_true": self._alpha_true}
        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
                           self.constraints.system_min_price,
                           self.constraints.system_max_price).astype(np.float32)
        result = self.commerce_platform.run_pricing_simulation(new_prices)
-        if self.commerce_platform.use_defense:
+    def step(self, action: np.ndarray) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]:
-            demand_est = result["q_hat_defended"]
+        if self._prices is None:
-            internal_err = result["internal_error_defended"]
+            raise RuntimeError("reset() must be called before step()")
        else:
            demand_est = result["q_hat_naive"]
            internal_err = result["internal_error_naive"]
-        self.state["elasticity"]["price"] = new_prices
+        prev = self._prices
-        self.state["elasticity"]["demand"] = demand_est
+        prices = constrain_prices(
            prev,
            np.asarray(action, dtype=np.float32),
            costs=self._costs,
            min_price=float(self.c.system_min_price),
            max_price=float(self.c.system_max_price),
            max_adjustment=float(self.c.max_price_adjustment),
            min_margin_pct=float(self.c.min_margin_pct),
        )
        self._prices = prices
        self._limbo.add_update("prices", prices)
-        volatility = 0.0 if self._prev_prices is None else \
+        sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents = self._observe_market(prices)
-            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
+        self._last_sessions = sessions
-        self._prev_prices = new_prices.copy()
+        self._limbo.add_update("demand", demand_map)
-        revenue_observed = float(result["revenue_observed"])
+        self._update_alpha_hat(self._last_sessions)
-        agent_loss = float(result["agent_loss"])
+        self._last_coi = compute_coi_window(self._last_sessions, self._costs, demand_mapping=demand_map)
        err_mean = float(np.mean(internal_err))
-        reward = (revenue_observed
+        self._alpha_true = float(np.clip(self._alpha_true + self.c.alpha_drift, *self.c.alpha_bounds))
-                 - self.constraints.w_agent_loss * agent_loss
+        volatility = float(np.std((prices - prev) / (prev + 1e-6)))
-                 - self.constraints.w_volatility * volatility
+        reward = float(self._reward(prices, revenue, cost, volatility))
-                 - self.constraints.w_estimation_error * err_mean)
+        conversion = float(np.sum(purchases) / max(len(self._last_sessions), 1))
-        terminated = self.t >= self.constraints.episode_length
+        self._t += 1
        terminated = self._t >= int(self.c.max_steps)
        obs = self._build_obs(prices, demand_by_product, revenue, conversion, min(volatility, 1.0))
        info = {
-            "t": self.t,
+            "step": self._t,
-            "revenue_observed": revenue_observed,
+            "reward": reward,
-            "revenue_oracle": float(result["revenue_oracle"]),
+            "revenue": float(revenue),
-            "agent_loss": agent_loss,
+            "profit": float(revenue - cost),
-            "ux_volatility": volatility,
+            "n_sessions": int(self.c.sessions_per_step),
-            "mean_internal_error": err_mean,
+            "n_agents": int(n_agents),
-            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
+            "alpha_true": float(self._alpha_true),
-            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
+            "alpha_hat": float(self._alpha_hat),
-            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
+            "alpha_error": float(abs(self._alpha_hat - self._alpha_true)),
-            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
+            "price_std": float(np.std(prices)),
            "price_volatility": float(volatility),
        }
-        return self.state, float(reward), terminated, False, info
+        if self._last_coi is not None:
            info.update(
                {
                    "coi_policy": float(self._last_coi.policy),
                    "coi_agent": float(self._last_coi.agent),
                    "coi_leakage": float(self._last_coi.leak),
                    "coi_survival": float(self._last_coi.survival_ratio),
                    "coi_erosion": float(coi_erosion(self._last_coi.policy, self._last_coi.agent)),
                }
            )
        return obs, reward, terminated, False, info
    def render(self, mode: str = "human") -> str | None:
        if self._prices is None:
            return None
        out = (
            f"t={self._t}/{self.c.max_steps} "
            f"alpha_true={self._alpha_true:.3f} alpha_hat={self._alpha_hat:.3f} "
            f"price_std={float(np.std(self._prices)):.2f}"
        )
        if mode == "human":
            print(out)
        return out
-if __name__ == "__main__":
+    def close(self) -> None:
-    import matplotlib.pyplot as plt
+        return
    from collections import defaultdict
    runs = {}
    for use_defense in (False, True):
        env = PHANTOMEnv(use_defense=use_defense)
        obs, _ = env.reset(seed=42)
        metrics = defaultdict(list)
        total_reward = 0.0
        done = False
        while not done:
            action = env.action_space.sample()
            obs, reward, done, _, info = env.step(action)
            total_reward += reward
            p_mean = float(np.mean(obs["elasticity"]["price"]))
            q_mean = float(np.mean(obs["elasticity"]["demand"]))
            p_std = float(np.std(obs["elasticity"]["price"]))
            metrics['t'].append(info['t'])
            metrics['price_mean'].append(p_mean)
            metrics['price_std'].append(p_std)
            metrics['demand_mean'].append(q_mean)
            metrics['revenue_observed'].append(info['revenue_observed'])
            metrics['revenue_oracle'].append(info['revenue_oracle'])
            metrics['agent_loss'].append(info['agent_loss'])
            metrics['ux_volatility'].append(info['ux_volatility'])
            metrics['look_to_book'].append(info['look_to_book'])
            metrics['reward'].append(reward)
            metrics['human_purchases'].append(info['true_human_purchases_total'])
            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
            if info['t'] % 20 == 0 or done:
                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
        runs[use_defense] = metrics
        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
    plot_configs = [
        ('price_mean', 'Mean Price', 'Price'),
        ('demand_mean', 'Mean Demand Estimate', 'Demand'),
        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
        ('reward', 'Step Reward', 'Reward'),
        ('human_purchases', 'Human Purchases', 'Count'),
        ('agent_purchases', 'Agent Purchases', 'Count'),
    ]
    for idx, (key, title, ylabel) in enumerate(plot_configs):
        ax = axes[idx // 3, idx % 3]
        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
            m = runs[use_defense]
            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
        ax.set_xlabel('Step')
        ax.set_ylabel(ylabel)
        ax.set_title(title, fontsize=10, fontweight='bold')
        ax.legend(loc='best', fontsize=8)
        ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
    print("Plot saved to phantom_env_comparison.png")
    plt.show()
--- a/sim/rl/jax_core/init.py
+++ b/sim/rl/jax_core/init.py
@@ -0,0 +1,11 @@
 """JAX-accelerated simulation core for PHANTOM environment."""
 from .transitions import TransitionData, compile_transitions, fallback_transitions, JAX_AVAILABLE
 from .simulation import SessionBatch, SimResult, sample_sessions, compute_metrics
 from .features import session_features, compute_session_transitions
 from .separability import compute_divergences, estimate_alpha_batch
 __all__ = [
    "JAX_AVAILABLE", "TransitionData", "compile_transitions", "fallback_transitions",
    "SessionBatch", "SimResult", "sample_sessions", "compute_metrics",
    "session_features", "compute_session_transitions", "compute_divergences", "estimate_alpha_batch",
 ]
--- a/sim/rl/jax_core/features.py
+++ b/sim/rl/jax_core/features.py
@@ -0,0 +1,69 @@
 """Vectorized session feature extraction."""
 import numpy as np
 from .transitions import N_STATES, PURCHASE_IDX, CART_IDX
 from .simulation import SessionBatch
 try:
    import jax.numpy as jnp
    from jax import jit
    JAX_AVAILABLE = True
 except ImportError:
    jnp, JAX_AVAILABLE = np, False
    def jit(f): return f
@jit
 def extract_features(states, dwells, lengths):
    """Extract per-session features. Returns (n_sess, 9) array."""
    n, max_len = states.shape
    mask = jnp.arange(max_len)[None,:] < lengths[:,None]
    duration = jnp.sum(dwells * mask, axis=1)
    total = lengths.astype(jnp.float32)
    count = lambda idx: jnp.sum((states == idx) & mask, axis=1).astype(jnp.float32)
    views, learn, carts, purchases = count(1), count(2), count(3), count(4)
    velocity = total / (duration + 1e-6)
    conversion = purchases / (views + 1e-6)
    avg_dwell = duration / (total + 1e-6)
    return jnp.stack([duration, avg_dwell, total, velocity, views, carts, purchases, learn, conversion], axis=1)
 def session_features(batch: SessionBatch) -> np.ndarray:
    if JAX_AVAILABLE:
        return np.asarray(extract_features(jnp.array(batch.states), jnp.array(batch.dwells), jnp.array(batch.lengths)))
    # numpy fallback
    n, max_len = batch.states.shape
    mask = np.arange(max_len)[None,:] < batch.lengths[:,None]
    duration = np.sum(batch.dwells * mask, axis=1)
    total = batch.lengths.astype(np.float32)
    count = lambda idx: np.sum((batch.states == idx) & mask, axis=1).astype(np.float32)
    views, learn, carts, purchases = count(1), count(2), count(3), count(4)
    return np.stack([duration, duration/(total+1e-6), total, total/(duration+1e-6), views, carts, purchases, learn, purchases/(views+1e-6)], axis=1)
@jit
 def session_transitions(states, lengths, n_states=N_STATES):
    """Compute empirical transition counts per session. Returns (n_sess, n_states, n_states)."""
    n, max_len = states.shape
    mask = jnp.arange(max_len - 1)[None,:] < (lengths[:,None] - 1)
    src, dst = states[:, :-1], states[:, 1:]
    # handle -1 padding by clamping to valid range
    src_c, dst_c = jnp.clip(src, 0, n_states-1), jnp.clip(dst, 0, n_states-1)
    valid = mask & (src >= 0) & (dst >= 0)
    def per_session(i):
        s, d, v = src_c[i], dst_c[i], valid[i]
        trans = (jnp.eye(n_states)[s,:,None] * jnp.eye(n_states)[d,None,:]).sum(0) * v[:,None,None]
        return trans.sum(0)
    # vmap not ideal here, use manual loop for clarity
    trans = jnp.stack([per_session(i) for i in range(n)])
    row_sums = trans.sum(axis=-1, keepdims=True)
    return trans / (row_sums + 1e-10)
 def compute_session_transitions(batch: SessionBatch) -> np.ndarray:
    if JAX_AVAILABLE:
        return np.asarray(session_transitions(jnp.array(batch.states), jnp.array(batch.lengths)))
    # numpy fallback
    n, max_len = batch.states.shape
    trans = np.zeros((n, N_STATES, N_STATES), dtype=np.float32)
    for i in range(n):
        for t in range(batch.lengths[i] - 1):
            s, d = batch.states[i, t], batch.states[i, t+1]
            if s >= 0 and d >= 0: trans[i, s, d] += 1
    row_sums = trans.sum(axis=-1, keepdims=True)
    return trans / (row_sums + 1e-10)
--- a/sim/rl/jax_core/separability.py
+++ b/sim/rl/jax_core/separability.py
@@ -0,0 +1,43 @@
 """Vectorized KL divergence for separability scoring."""
 import numpy as np
 from typing import Tuple
 try:
    import jax.numpy as jnp
    from jax import jit
    JAX_AVAILABLE = True
 except ImportError:
    jnp, JAX_AVAILABLE = np, False
    def jit(f): return f
@jit
 def batch_kl(P, Q_human, Q_agent, eps=1e-10):
    """Compute KL(P||Q) for batched P. P:(n,s,s), Q:(s,s). Returns (delta_h, delta_a) each (n,)."""
    p = P + eps
    p = p / p.sum(axis=-1, keepdims=True)
    qh, qa = Q_human[None] + eps, Q_agent[None] + eps
    delta_h = jnp.sum(p * jnp.log(p / qh), axis=(1, 2))
    delta_a = jnp.sum(p * jnp.log(p / qa), axis=(1, 2))
    return delta_h, delta_a
 def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """Compute KL divergence of each session from human/agent prototypes."""
    if JAX_AVAILABLE:
        dh, da = batch_kl(jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent))
        return np.asarray(dh), np.asarray(da)
    # numpy fallback
    eps = 1e-10
    p = session_trans + eps
    p = p / p.sum(axis=-1, keepdims=True)
    qh, qa = ref_human[None] + eps, ref_agent[None] + eps
    delta_h = np.sum(p * np.log(p / qh), axis=(1, 2))
    delta_a = np.sum(p * np.log(p / qa), axis=(1, 2))
    return delta_h, delta_a
 def estimate_alpha_batch(prob_agent: np.ndarray, delta_h: np.ndarray, delta_a: np.ndarray, temp: float = 1.0) -> np.ndarray:
    """Vectorized alpha estimation from classifier probs and divergences."""
    mass = delta_h + delta_a
    ratio = np.where(mass > 1e-8, delta_a / mass, 0.5)
    blended = 0.5 * prob_agent + 0.5 * ratio
    if temp <= 0: return np.clip(blended, 0.0, 1.0)
    return np.clip(1.0 / (1.0 + np.exp(-temp * (blended - 0.5))), 0.0, 1.0)
--- a/sim/rl/jax_core/simulation.py
+++ b/sim/rl/jax_core/simulation.py
@@ -0,0 +1,116 @@
 """Vectorized Markov chain session sampling with JAX."""
 from typing import NamedTuple, Tuple
 import numpy as np
 from functools import partial
 try:
    import jax, jax.numpy as jnp
    from jax import lax
    JAX_AVAILABLE = True
 except ImportError:
    JAX_AVAILABLE = False
 from .transitions import TransitionData, N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX
 class SessionBatch(NamedTuple):
    states: np.ndarray      # (n_sess, max_len) state indices, -1=padding
    dwells: np.ndarray      # (n_sess, max_len) dwell times
    products: np.ndarray    # (n_sess,) product index per session
    actors: np.ndarray      # (n_sess,) 0=human, 1=agent
    lengths: np.ndarray     # (n_sess,) actual session length
 class SimResult(NamedTuple):
    demand_human: np.ndarray
    demand_agent: np.ndarray
    revenue: float
    revenue_oracle: float
    agent_loss: float
    coi: float
    look_to_book: float
    mean_sale_price: float
    n_human_purchases: int
    n_agent_purchases: int
    sessions: SessionBatch
 if JAX_AVAILABLE:
    @partial(jax.jit, static_argnums=(5,6,7))
    def _sample_sessions_jax(key, T_human, T_agent, dwell_human, dwell_agent, n_human, n_agent, max_steps):
        n = n_human + n_agent
        k1, k2, k3, k4 = jax.random.split(key, 4)
        actors = jnp.concatenate([jnp.zeros(n_human, dtype=jnp.int32), jnp.ones(n_agent, dtype=jnp.int32)])
        T = jnp.where(actors[:,None,None]==0, T_human[None], T_agent[None])  # (n,6,6)
        dwell_p = jnp.where(actors[:,None,None]==0, dwell_human[None], dwell_agent[None])  # (n,6,2)
        def step(carry, _):
            s, active, k = carry
            k, k1, k2 = jax.random.split(k, 3)
            probs = T[jnp.arange(n), s]  # (n,6)
            nxt = jax.random.categorical(k1, jnp.log(probs + 1e-10))
            nxt = jnp.where(active, nxt, -1)
            shape = dwell_p[jnp.arange(n), s, 0]
            scale = dwell_p[jnp.arange(n), s, 1]
            dwell = jnp.maximum(0.3, jax.random.gamma(k2, shape) * scale)
            still = active & (nxt != TERM_IDX) & (nxt >= 0)
            return (nxt, still, k), (nxt, dwell)
        init = (jnp.zeros(n, dtype=jnp.int32), jnp.ones(n, dtype=jnp.bool_), k3)
        _, (states, dwells) = lax.scan(step, init, None, length=max_steps)
        states, dwells = states.T, dwells.T  # (n, max_steps)
        is_term = (states == -1) | (states == TERM_IDX)
        lengths = jnp.argmax(is_term, axis=1) + 1
        lengths = jnp.where(jnp.any(is_term, axis=1), lengths, max_steps)
        return states, dwells, actors, lengths
 def sample_sessions(key, trans: TransitionData, n_human: int, n_agent: int, n_products: int, max_steps: int = 40) -> SessionBatch:
    if JAX_AVAILABLE:
        k1, k2 = jax.random.split(key)
        states, dwells, actors, lengths = _sample_sessions_jax(k1, trans.human_T, trans.agent_T, trans.human_dwell, trans.agent_dwell, n_human, n_agent, max_steps)
        products = jax.random.randint(k2, (n_human + n_agent,), 0, n_products)
        return SessionBatch(np.asarray(states), np.asarray(dwells), np.asarray(products), np.asarray(actors), np.asarray(lengths))
    # numpy fallback
    rng = np.random.default_rng(int(key[0]) if hasattr(key, '__getitem__') else 42)
    n = n_human + n_agent
    actors = np.concatenate([np.zeros(n_human, dtype=np.int32), np.ones(n_agent, dtype=np.int32)])
    products = rng.integers(0, n_products, size=n)
    states, dwells = np.full((n, max_steps), -1, dtype=np.int32), np.zeros((n, max_steps), dtype=np.float32)
    lengths = np.zeros(n, dtype=np.int32)
    for i in range(n):
        T = trans.human_T if actors[i] == 0 else trans.agent_T
        dp = trans.human_dwell if actors[i] == 0 else trans.agent_dwell
        s, t = 0, 0
        while t < max_steps and s != TERM_IDX:
            states[i, t] = s
            dwells[i, t] = max(0.3, rng.gamma(dp[s, 0], dp[s, 1]))
            s = rng.choice(N_STATES, p=T[s])
            t += 1
        lengths[i] = t
    return SessionBatch(states, dwells, products, actors, lengths)
 def compute_metrics(batch: SessionBatch, prices: np.ndarray, unit_cost: np.ndarray, base_price: np.ndarray) -> SimResult:
    purchased = np.any(batch.states == PURCHASE_IDX, axis=1)
    human_mask, agent_mask = batch.actors == 0, batch.actors == 1
    human_purch, agent_purch = purchased & human_mask, purchased & agent_mask
    demand_h = np.bincount(batch.products[human_purch], minlength=len(prices)).astype(np.float32)
    demand_a = np.bincount(batch.products[agent_purch], minlength=len(prices)).astype(np.float32)
    # revenue and oracle
    purch_products = batch.products[purchased]
    revenue = float(np.sum(prices[purch_products]))
    revenue_oracle = float(np.sum(base_price[purch_products]))
    # agent loss: base_price - price_paid for agent purchases (agents gaming the system)
    agent_products = batch.products[agent_purch]
    agent_loss = float(np.sum(base_price[agent_products] - prices[agent_products]))
    # COI: margin - expected_premium*0.5 for human purchases
    human_products = batch.products[human_purch]
    if len(human_products) > 0:
        margin = float(np.mean(prices[human_products] - unit_cost[human_products]))
        premium = float(np.mean(base_price[human_products] - prices[human_products]))
        coi = max(0.0, margin - premium * 0.5)
    else:
        coi = 0.0
    # look to book: views / purchases
    views = float(np.sum(batch.states == 1))  # view_item_page = index 1
    n_purch = int(purchased.sum())
    look_to_book = views / (n_purch + 1e-6)
    mean_sale = float(np.mean(prices[purch_products])) if n_purch > 0 else 0.0
    return SimResult(demand_h, demand_a, revenue, revenue_oracle, agent_loss, coi, look_to_book, mean_sale,
                     int(human_purch.sum()), int(agent_purch.sum()), batch)
--- a/sim/rl/jax_core/transitions.py
+++ b/sim/rl/jax_core/transitions.py
@@ -0,0 +1,47 @@
 """Dense transition matrices for JAX Markov chain sampling."""
 from dataclasses import dataclass
 import numpy as np
 try:
    import jax.numpy as jnp
    JAX_AVAILABLE = True
 except ImportError:
    jnp, JAX_AVAILABLE = np, False
 STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
 S2I = {s: i for i, s in enumerate(STATES)}
 N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX = len(STATES), 5, 4, 3
@dataclass
 class TransitionData:
    human_T: np.ndarray   # (6,6) transition probs
    agent_T: np.ndarray   # (6,6)
    human_dwell: np.ndarray  # (6,2) shape,scale
    agent_dwell: np.ndarray  # (6,2)
    def to_jax(self):
        if not JAX_AVAILABLE: return self
        return TransitionData(*[jnp.array(x) for x in [self.human_T, self.agent_T, self.human_dwell, self.agent_dwell]])
 def dict_to_dense(d):
    m = np.zeros((N_STATES, N_STATES), dtype=np.float32)
    for src, dsts in d.items():
        if (i := S2I.get(src)) is not None:
            for dst, p in dsts.items():
                if (j := S2I.get(dst)) is not None: m[i,j] = p
    m /= np.maximum(m.sum(1, keepdims=True), 1e-8)
    m[TERM_IDX] = 0; m[TERM_IDX, TERM_IDX] = 1.0
    return m
 def compile_transitions(human_profile, agent_profile):
    def dwell_arr(params): return np.array([[params.get(s, (2.0, 1.0)) for s in STATES]], dtype=np.float32).reshape(N_STATES, 2)
    return TransitionData(dict_to_dense(human_profile.transitions), dict_to_dense(agent_profile.transitions),
                          dwell_arr(human_profile.dwell_params), dwell_arr(agent_profile.dwell_params))
 def fallback_transitions():
    H = {"session_start": {"view_item_page": .85, "session_end": .15}, "view_item_page": {"learn_more_about_item": .4, "add_item_to_cart": .3, "view_item_page": .2, "session_end": .1},
         "learn_more_about_item": {"add_item_to_cart": .5, "view_item_page": .3, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .6, "view_item_page": .25, "session_end": .15}, "purchase_complete": {"session_end": 1.0}}
    A = {"session_start": {"view_item_page": .9, "session_end": .1}, "view_item_page": {"learn_more_about_item": .5, "add_item_to_cart": .25, "view_item_page": .15, "session_end": .1},
         "learn_more_about_item": {"add_item_to_cart": .4, "view_item_page": .4, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .5, "view_item_page": .3, "session_end": .2}, "purchase_complete": {"session_end": 1.0}}
    dwell = np.full((N_STATES, 2), [2.0, 1.0], dtype=np.float32)
    return TransitionData(dict_to_dense(H), dict_to_dense(A), dwell.copy(), dwell.copy())
--- a/sim/rl/train.py
+++ b/sim/rl/train.py
@@ -0,0 +1,175 @@
 import numpy as np
 import logging
 from pathlib import Path
 from typing import Dict, Type, Optional
 import pickle
 from torch.utils.tensorboard import SummaryWriter
 from sim.rl.environment import PHANTOMEnv, BusinessLogicConstraints
 logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
 logger = logging.getLogger(__name__)
 try:
    from sim.rl.engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
                       SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
 except ImportError as e:
    BasePricingEngine = None  # engines not required for basic usage
    print(e)
 """
 Target training loop:
 have base prices p0 from env reset and run the env step, collect reward and metrics
 pass this to the pricing engine which computes the price action to take based on previous reward by learning
 the new action gets passed to the step
 so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward
 to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices
 CURRENT SOLUTION BELOW does not implement correct learning or updates.
 """
 class EngineTrainer:
    """wrapper to run pricing engines through episodes and collect metrics"""
    def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None):
        self.engine = engine
        self.env = env
        self.episode_metrics = []
        self.tb_writer = tb_writer
        self.global_step = 0
    def train(self, n_episodes: int, seed: int = 42):
        for ep in range(n_episodes):
            obs, _ = self.env.reset(seed=seed + ep)
            self.engine.reset()
            done = False
            prev_prices = obs["elasticity"]["price"]
            episode_reward = 0.0
            last_info: Dict[str, float] = {}
            while not done:
                action_prices = self.engine.compute_prices(prev_prices, obs)
                obs, reward, done, _, info = self.env.step(action_prices)
                self.engine.update(obs, reward, done, info)
                episode_reward += reward
                prev_prices = obs["elasticity"]["price"]
                last_info = info
                if self.tb_writer:
                    self.tb_writer.add_scalar("reward/step", reward, self.global_step)
                    if "coi" in info:
                        self.tb_writer.add_scalar("diagnostics/coi", info["coi"], self.global_step)
                    if "alpha_hat" in info:
                        self.tb_writer.add_scalar("diagnostics/alpha_hat", info["alpha_hat"], self.global_step)
                self.global_step += 1
            last_info = dict(last_info)
            last_info.update({"episode_reward": episode_reward, "episode": ep})
            self.episode_metrics.append(last_info)
            if self.tb_writer:
                self.tb_writer.add_scalar("reward/episode", episode_reward, ep)
        return self
    def run_episode(self, seed: int = 42) -> Dict:
        """run single evaluation episode and return metrics"""
        obs, _ = self.env.reset(seed=seed)
        self.engine.reset()
        total_reward = 0.0
        prev_prices = obs["elasticity"]["price"]
        ep_metrics = {'total_reward': 0.0}
        done = False
        while not done:
            action_prices = self.engine.compute_prices(prev_prices, obs)
            obs, reward, done, _, info = self.env.step(action_prices)
            total_reward += reward
            for k, v in info.items():
                ep_metrics[k] = v
            prev_prices = obs["elasticity"]["price"]
        ep_metrics['total_reward'] = total_reward
        return ep_metrics
    def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict:
        """evaluate trained engine"""
        results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle',
                                   'agent_loss', 'ux_volatility', 'look_to_book']}
        for ep in range(n_episodes):
            metrics = self.run_episode(seed=seed + ep)
            for k in results:
                results[k].append(metrics.get(k, 0.0))
        return {k: (np.mean(v), np.std(v)) for k, v in results.items()}
 def make_env():
    return PHANTOMEnv(constraints=BusinessLogicConstraints())
 def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42,
                tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
    constraints = env.constraints
    engine = engine_cls(constraints=constraints, seed=seed)
    trainer = EngineTrainer(engine, env, tb_writer=tb_writer)
    trainer.train(n_episodes, seed=seed)
    return trainer
 def save_trainer(trainer: EngineTrainer, path: Path):
    """save engine state and metrics"""
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, 'wb') as f:
        pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f)
    logger.info(f"Saved trainer to {path}")
 def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
    """load saved engine"""
    with open(path, 'rb') as f:
        data = pickle.load(f)
    trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer)
    trainer.episode_metrics = data['metrics']
    return trainer
 if __name__ == "__main__":
    if BasePricingEngine is None:
        logger.error("Engines not available, cannot run training")
        exit(1)
    base_dir = Path("./sim/rl/runs")
    base_dir.mkdir(exist_ok=True)
    engines = {
        "Wild": WildPricingEngine,
        "Static": StaticPricingEngine,
        "RandomWalk": RandomWalkEngine,
        "ThompsonSampling": ThompsonSamplingEngine,
    }
    n_train_episodes = 50
    n_eval_episodes = 10
    seed = 42
    logger.info(f"Training config: {n_train_episodes} episodes per engine")
    trained_trainers = {}
    for engine_name, engine_cls in engines.items():
        run_name = engine_name
        log_dir = base_dir / run_name
        log_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"Training {engine_name}")
        logger.info(f"Log directory: {log_dir}")
        env = make_env()
        tb_writer = SummaryWriter(log_dir=str(log_dir))
        trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
        tb_writer.close()
        save_path = log_dir / "trainer.pkl"
        save_trainer(trainer, save_path)
        trained_trainers[run_name] = (trainer, env)
    logger.info("Starting evaluation")
    for run_name, (trainer, env) in trained_trainers.items():
        logger.info(f"Evaluating {run_name}")
        results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000)
        for metric, (mean, std) in results.items():
            logger.info(f"  {metric:20s}: {mean:10.2f} ± {std:6.2f}")
    logger.info(f"Results saved to: {base_dir}")
--- a/sim/strong_learner/data.py
+++ b/sim/strong_learner/data.py
@@ -0,0 +1,108 @@
 import os
 import requests
 try:
    import py7zr  # type: ignore
 except ImportError:  # pragma: no cover - optional dependency
    py7zr = None
 import pandas as pd
 from typing import Generator
 try:
    from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader
 except ImportError:
    from loader import PayloadModel, ValueModel, InteractionModel, Loader
 class YooChooseLoader(Loader):
    URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z"
    CLICK_COLS = ['session_id', 'ts', 'item_id', 'category']
    BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity']
    def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000):
        self.root = root_dir
        self.chunk_size = chunk_size
        self.max_sessions = max_sessions
        self.click_path = f"{root_dir}/yoochoose-clicks.dat"
        self.buy_path = f"{root_dir}/yoochoose-buys.dat"
        if not os.path.exists(self.click_path): self._setup()
        self.data = self._load_sessions(max_sessions)
        self.entries = list(self.data.keys())
    def _setup(self):
        if py7zr is None:
            raise RuntimeError("py7zr is required to unpack YooChoose dataset. Install py7zr first.")
        os.makedirs(self.root, exist_ok=True)
        zip_path = f"{self.root}/temp.7z"
        with requests.get(self.URL, stream=True) as r:
            with open(zip_path, 'wb') as f:
                for chunk in r.iter_content(8192):
                    f.write(chunk)
        with py7zr.SevenZipFile(zip_path, 'r') as z:
            z.extractall(self.root)
        os.remove(zip_path)
    def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel:
        payload = PayloadModel(
            sessionId=sid, experimentId=None, eventName=event,
            page=page, productId=item_id, metadata=meta,
            storeMode="yoochoose", userAgent="dataset", ts=ts
        )
        return InteractionModel(
            partitionID=0, offset=0, timestamp=0, compression="",
            isTransactional=False, headers=[], key={},
            value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0)
        )
    def _parse_category(self, cat) -> str:
        if pd.isna(cat) or cat == "0": return "unknown"
        if cat == "S": return "special_offer"
        try:
            n = int(cat)
            return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}"
        except: return str(cat)
    def stream_clicks(self) -> Generator[InteractionModel, None, None]:
        with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader:
            for chunk in reader:
                for r in chunk.itertuples(index=False):
                    yield self._make_interaction(
                        str(r.session_id), r.ts, str(r.item_id),
                        "view_item_page", self._parse_category(r.category), {}
                    )
    def stream_buys(self) -> Generator[InteractionModel, None, None]:
        with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader:
            for chunk in reader:
                for r in chunk.itertuples(index=False):
                    yield self._make_interaction(
                        str(r.session_id), r.ts, str(r.item_id),
                        "purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity}
                    )
    def stream(self) -> Generator[InteractionModel, None, None]:
        yield from self.stream_clicks()
        yield from self.stream_buys()
    def _load_sessions(self, max_sessions: int | None = None) -> dict:
        sessions = {}
        for interaction in self.stream():
            sid = interaction.value.payload.sessionId
            if sid not in sessions:
                if max_sessions and len(sessions) >= max_sessions: continue
                sessions[sid] = []
            sessions[sid].append(interaction)
        for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts)
        return sessions
    def get_data(self) -> dict:
        return self.data
    def get_entries(self) -> tuple[list[str], int]:
        return self.entries, len(self.entries)
 if __name__ == "__main__":
    loader = YooChooseLoader(max_sessions=100)
    views, purchases = 0, 0
    for sid, evts in loader.get_data().items():
        for e in evts:
            if e.value.payload.eventName == "view_item_page": views += 1
            elif e.value.payload.eventName == "purchase_complete": purchases += 1
    print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete")
--- a/tests/e2e/.env.example
+++ b/tests/e2e/.env.example
@@ -0,0 +1,7 @@
 WEB_URL=http://localhost:3000
 BACKEND_URL=http://localhost:5000
 PRICING_PROVIDER_URL=http://localhost:5001
 AIRFLOW_URL=http://localhost:8085
 AIRFLOW_USER=admin
 AIRFLOW_PASS=admin
 HEADLESS=true
--- a/tests/e2e/helpers/airflow.ts
+++ b/tests/e2e/helpers/airflow.ts
@@ -0,0 +1,61 @@
 const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085';
 const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64');
 const req = (path: string, opts: any = {}) => {
  const headers = { Authorization: AUTH, ...opts.headers };
  return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers });
 };
 export const triggerDag = async (dagId: string, conf = {}) => {
  const r = await req(`/api/v1/dags/${dagId}/dagRuns`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ conf }),
  });
  if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`);
  return (await r.json()).dag_run_id;
 };
 export const getDagStatus = async (dagId: string, runId: string) => {
  const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`);
  if (!r.ok) throw new Error(`Get status failed: ${r.status}`);
  return (await r.json()).state;
 };
 export const cancelDag = async (dagId: string, runId: string) => {
  const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, {
    method: 'PATCH',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ state: 'failed' }),
  });
  if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`);
 };
 export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => {
  const t0 = Date.now();
  while (Date.now() - t0 < maxMs) {
    const state = await getDagStatus(dagId, runId);
    if (state === 'success') return;
    if (state === 'failed') throw new Error(`DAG ${runId} failed`);
    await new Promise(r => setTimeout(r, pollMs));
  }
  await cancelDag(dagId, runId);
  throw new Error(`DAG ${runId} timeout`);
 };
 export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => {
  const runId = await triggerDag(dagId, conf);
  await waitForDag(dagId, runId, maxMs);
 };
 export const runSessionPricing = (mode = 'hotel') =>
  runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000);
 export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) =>
  runDag('surge_pricing_pipeline', {
    store_mode: mode,
    high_threshold: highThresh,
    low_threshold: lowThresh,
    surge_multiplier: 1.2,
    discount_multiplier: 0.9
  }, 90000);
--- a/tests/e2e/helpers/kafka.ts
+++ b/tests/e2e/helpers/kafka.ts
@@ -9,8 +9,8 @@ interface InteractionEvent {
 const dumpKafkaTopic = async (backendUrl: string, topic: string) => {
  const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`);
  if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`);
-  const { messages = [] } = await resp.json();
+  const { data = [] } = await resp.json();
-  return messages as any[];
+  return data as any[];
 };
 export const waitForInteractionEvent = async (
--- a/tests/e2e/playwright.config.ts
+++ b/tests/e2e/playwright.config.ts
@@ -5,14 +5,14 @@ export default defineConfig({
  fullyParallel: true,
  forbidOnly: !!process.env.CI,
  retries: 0,
-  workers: 5,
+  workers: 1,
  reporter: 'list',
  use: {
    baseURL: process.env.WEB_URL || 'http://localhost:3000',
    trace: 'retain-on-failure',
    screenshot: 'only-on-failure',
  },
-  timeout: 60000,
+  timeout: 180000,
  expect: {
    timeout: 10000,
  },
--- a/tests/e2e/scenarios/session-aware.spec.ts
+++ b/tests/e2e/scenarios/session-aware.spec.ts
@@ -9,6 +9,7 @@ import {
  addToCart,
 } from '../helpers/interactions';
 import { getSessionEvents } from '../helpers/kafka';
 import { runSessionPricing } from '../helpers/airflow';
 test.describe('SessionAwarePricer E2E', () => {
  const STORE_TYPE = 'hotel';
@@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => {
    await page.waitForTimeout(1500);
    const productId2 = await humanLikeViewProduct(page, STORE_TYPE);
    await runSessionPricing(STORE_TYPE);
    const secondPrice = await getPriceFromDOM(page);
    expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
@@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => {
    await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE);
    expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
-    await page.waitForTimeout(2500);
+    await page.waitForTimeout(1000);
    const events = await getSessionEvents(backendUrl, sessionId);
    expect(events.length).toBeGreaterThanOrEqual(8);
    await runSessionPricing(STORE_TYPE);
    await page.goto(`/products/${productId}`);
    await page.waitForLoadState('networkidle');
    const agentPrice = await getPriceFromDOM(page);
@@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => {
    const productId = await viewProductViaFlow(page, STORE_TYPE);
    const baselinePrice = await getPriceFromDOM(page);
    const startTime = Date.now();
    await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE);
    const duration = (Date.now() - startTime) / 1000;
-    const eventsPerSec = 10 / duration;
+    const events = await getSessionEvents(backendUrl, sessionId);
-    expect(eventsPerSec).toBeGreaterThan(2.0);
+    expect(events.length).toBeGreaterThanOrEqual(10);
-    await page.waitForTimeout(2000);
+    await runSessionPricing(STORE_TYPE);
    await page.goto(`/products/${productId}`);
    await page.waitForLoadState('networkidle');
@@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => {
    await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE);
-    await page.waitForTimeout(1500);
+    await page.waitForTimeout(1000);
    await humanLikeViewProduct(page, STORE_TYPE);
    await runSessionPricing(STORE_TYPE);
    const finalPrice = await getPriceFromDOM(page);
    expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3);
--- a/tests/e2e/scenarios/surge-pricing.spec.ts
+++ b/tests/e2e/scenarios/surge-pricing.spec.ts
@@ -7,6 +7,7 @@ import {
  verifySessionConsistency,
 } from '../helpers/interactions';
 import { waitForInteractionEvent, countProductViews } from '../helpers/kafka';
 import { runSurgePricing } from '../helpers/airflow';
 test.describe('SimpleSurgePricer E2E', () => {
  const STORE_TYPE = 'hotel';
@@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => {
    await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE);
-    await page.waitForTimeout(2000);
+    await page.waitForTimeout(1000);
    const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page');
    expect(evt).not.toBeNull();
@@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => {
    const viewCount = await countProductViews(backendUrl, productId);
    expect(viewCount).toBeGreaterThanOrEqual(5);
    await runSurgePricing(STORE_TYPE, 3, 1);
    await page.goto(`/products/${productId}`);
    await page.waitForLoadState('networkidle');
    const surgedPrice = await getPriceFromDOM(page);
@@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => {
    await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE);
-    await page.waitForTimeout(1500);
+    await page.waitForTimeout(1000);
    await runSurgePricing(STORE_TYPE, 3, 1);
    await page.goto(`/products/${productId}`);
    await page.waitForLoadState('networkidle');
@@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => {
    await page.waitForTimeout(12000);
    await runSurgePricing(STORE_TYPE, 3, 1);
    await page.goto(`/products/${productId}`);
    await page.waitForLoadState('networkidle');
    const decayedPrice = await getPriceFromDOM(page);
--- a/web/src/app/api/pricing/route.ts
+++ b/web/src/app/api/pricing/route.ts
@@ -30,6 +30,8 @@ export async function GET(req: NextRequest) {
    const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001';
    try {
        const queryParams = new URLSearchParams();
        // THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed
        // The whole pipeline gets triggered from here.
        if (sessionId) queryParams.append('sessionId', sessionId);
        if (experimentId) queryParams.append('experimentId', experimentId);
@@ -55,25 +57,26 @@ export async function GET(req: NextRequest) {
        price = Math.round(randomBase * 100) / 100;
    }
-    // log price to kafka for elasticity computation
+    // log price to kafka asynchronously (non-blocking)
    if (sessionId) {
        const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000';
-        try {
+        // fire and forget - don't await to avoid blocking response
-            await fetch(`${backendUrl}/api/kafka/price-log`, {
+        fetch(`${backendUrl}/api/kafka/price-log`, {
-                method: 'POST',
+            method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
+            headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({
+            body: JSON.stringify({
-                    productId,
+                productId,
-                    price,
+                price,
-                    sessionId,
+                sessionId,
-                    experimentId: experimentId || undefined,
+                experimentId: experimentId || undefined,
-                    storeMode,
+                storeMode,
-                    ts: timestamp,
+                ts: timestamp,
-                }),
+            }),
-            });
+        }).catch(err => {
-        } catch (err) {
+            if (process.env.NODE_ENV === 'development') {
-            console.error('[price-log-error]', err);
+                console.error('[price-log-error]', err);
-        }
+            }
        });
    }
    if (process.env.NODE_ENV === 'development') {
--- a/web/src/app/cart/page.tsx
+++ b/web/src/app/cart/page.tsx
@@ -32,7 +32,8 @@ export default function CartPage() {
                    {itemCount > 0 && (
                        <button
                            onClick={clearCart}
-                            className="text-sm text-red-600 hover:underline"
+                            className="text-sm hover:underline"
                            style={{ color: 'var(--accent-warning)' }}
                        >
                            Clear cart
                        </button>
@@ -42,7 +43,7 @@ export default function CartPage() {
                {itemCount === 0 ? (
                    <div className="text-center py-12">
                        <p className="text-gray-500 mb-4">Your cart is empty</p>
-                        <a href="/" className="text-blue-600 hover:underline">Browse our selection</a>
+                        <a href="/" className="hover:underline" style={{ color: 'var(--text-accent)' }}>Browse our selection</a>
                    </div>
                ) : (
                    <>
@@ -54,15 +55,11 @@ export default function CartPage() {
                                >
                                    <div className="flex-1">
                                        <div className="flex items-center gap-2 mb-1">
                                            <span className="px-2 py-0.5 text-xs font-medium rounded bg-blue-100 text-blue-800">
                                                {item.type}
                                            </span>
                                            <h3 className="font-semibold">{item.name}</h3>
                                        </div>
                                        {item.type === 'hotel' && (
                                            <div className="text-sm text-gray-600">
                                                <p>{String(item.metadata.roomType)}</p>
                                                <p>{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}</p>
                                                <p>{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}</p>
                                            </div>
@@ -81,7 +78,8 @@ export default function CartPage() {
                                        <p className="text-xl font-bold mb-2">${item.price}</p>
                                        <button
                                            onClick={() => handleRemove(item.id, item.type)}
-                                            className="text-sm text-red-600 hover:underline"
+                                            className="text-sm hover:underline"
                                            style={{ color: 'var(--accent-warning)' }}
                                        >
                                            Remove
                                        </button>
@@ -100,7 +98,7 @@ export default function CartPage() {
                                    dispatchInteraction('checkout_start', undefined, { total, itemCount });
                                    window.location.href = '/checkout';
                                }}
-                                className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors"
+                                className="btn-primary w-full"
                            >
                                Proceed to Checkout
                            </button>
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -8,6 +8,9 @@
  --bg-secondary: #f5f5f5;
  --text-primary: #333333;
  --text-secondary: #666666;
  --accent-primary: #007aff;
  --accent-primary-hover: #0051d5;
  --accent-primary-light: #e6f2ff;
  --spacing-sm: 8px;
  --spacing-md: 16px;
  --spacing-lg: 32px;
--- a/web/src/app/layout.tsx
+++ b/web/src/app/layout.tsx
@@ -15,8 +15,8 @@ const geistMono = Geist_Mono({
 });
 export const metadata: Metadata = {
-  title: "Create Next App",
+  title: "Travel Booking Platform",
-  description: "Generated by create next app",
+  description: "Book flights and hotels with dynamic pricing",
 };
 export default function RootLayout({
--- a/web/src/app/page.tsx
+++ b/web/src/app/page.tsx
@@ -1,65 +1,5 @@
-import Image from "next/image";
+import { redirect } from 'next/navigation';
 export default function Home() {
-  return (
+  redirect('/hotel');
    <div className="flex min-h-screen items-center justify-center bg-zinc-50 font-sans dark:bg-black">
      <main className="flex min-h-screen w-full max-w-3xl flex-col items-center justify-between py-32 px-16 bg-white dark:bg-black sm:items-start">
        <Image
          className="dark:invert"
          src="/next.svg"
          alt="Next.js logo"
          width={100}
          height={20}
          priority
        />
        <div className="flex flex-col items-center gap-6 text-center sm:items-start sm:text-left">
          <h1 className="max-w-xs text-3xl font-semibold leading-10 tracking-tight text-black dark:text-zinc-50">
            To get started, edit the page.tsx file.
          </h1>
          <p className="max-w-md text-lg leading-8 text-zinc-600 dark:text-zinc-400">
            Looking for a starting point or more instructions? Head over to{" "}
            <a
              href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
              className="font-medium text-zinc-950 dark:text-zinc-50"
            >
              Templates
            </a>{" "}
            or the{" "}
            <a
              href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
              className="font-medium text-zinc-950 dark:text-zinc-50"
            >
              Learning
            </a>{" "}
            center.
          </p>
        </div>
        <div className="flex flex-col gap-4 text-base font-medium sm:flex-row">
          <a
            className="flex h-12 w-full items-center justify-center gap-2 rounded-full bg-foreground px-5 text-background transition-colors hover:bg-[#383838] dark:hover:bg-[#ccc] md:w-[158px]"
            href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
            target="_blank"
            rel="noopener noreferrer"
          >
            <Image
              className="dark:invert"
              src="/vercel.svg"
              alt="Vercel logomark"
              width={16}
              height={16}
            />
            Deploy Now
          </a>
          <a
            className="flex h-12 w-full items-center justify-center rounded-full border border-solid border-black/[.08] px-5 transition-colors hover:border-transparent hover:bg-black/[.04] dark:border-white/[.145] dark:hover:bg-[#1a1a1a] md:w-[158px]"
            href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
            target="_blank"
            rel="noopener noreferrer"
          >
            Documentation
          </a>
        </div>
      </main>
    </div>
  );
 }
--- a/web/src/components/feats/hotel/HotelCard.tsx
+++ b/web/src/components/feats/hotel/HotelCard.tsx
@@ -2,6 +2,7 @@
 import type { EventName } from '@/lib/events';
 import type { Hotel } from '@/lib/hotel-utils';
 import { getHotelImageUrl } from '@/lib/hotel-utils';
 import { useHoverTracking } from '@/hooks/useHoverTracking';
 import PriceDisplay from '@/components/ui/PriceDisplay';
@@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
        window.location.href = `/hotel/products/${hotel.id}`;
    };
    const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`;
    return (
        <div
            className="hotel-card cursor-pointer"
@@ -56,7 +55,7 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
        >
            <div className="hotel-image relative overflow-hidden">
                <img
-                    src={imageUrl}
+                    src={getHotelImageUrl(hotel.id, { w: 400, h: 300 })}
                    alt={hotel.name}
                    className="w-full h-full object-cover"
                    onError={(e) => {
--- a/web/src/components/feats/hotel/HotelDetails.tsx
+++ b/web/src/components/feats/hotel/HotelDetails.tsx
@@ -2,6 +2,7 @@
 import { useState, useEffect } from 'react';
 import type { Hotel } from '@/lib/hotel-utils';
 import { getHotelImageUrl } from '@/lib/hotel-utils';
 import PriceDisplay from '@/components/ui/PriceDisplay';
 interface HotelDetailsProps {
@@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n
 };
 export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) {
  const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`;
  return (
    <div className="w-full flex flex-col lg:flex-row gap-12 py-8">
      <div className="w-full lg:w-1/2 rounded-lg aspect-[4/3] overflow-hidden shrink-0">
        <img
-          src={imageUrl}
+          src={getHotelImageUrl(product.id, { w: 800, h: 600 })}
          alt={product.name}
          className="w-full h-full object-cover"
          onError={(e) => {
--- a/web/src/components/ui/Navigation.tsx
+++ b/web/src/components/ui/Navigation.tsx
@@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode }
      href={href}
      className={`px-4 py-2 rounded-md transition-colors ${
        isActive
-          ? 'bg-[var(--accent-primary)] font-semibold'
+          ? 'bg-[var(--accent-primary)] text-white font-semibold'
          : 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]'
      }`}
    >
--- a/web/src/lib/airline-utils.ts
+++ b/web/src/lib/airline-utils.ts
@@ -31,7 +31,7 @@ export interface Flight {
  availability: number;
 }
-const EPOCH = new Date(0);
+import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
 export const transformProduct = (p: AirlineProduct): Flight => {
  const { id, flight_type, date_index, metadata, availability } = p;
@@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => {
  };
 };
-// convert date string to days from today
+export { dateToDaysFromToday, dateToIndex, todayIndex };
 export const dateToDaysFromToday = (dateStr: string): number => {
  const target = new Date(dateStr);
  target.setHours(0, 0, 0, 0);
  const today = new Date();
  today.setHours(0, 0, 0, 0);
  return Math.floor((target.getTime() - today.getTime()) / 86400000);
 };
 // convert date string to date_index (days since epoch)
 export const dateToIndex = (dateStr: string): number => {
  const d = new Date(dateStr);
  return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
 };
 // get current date_index
 export const todayIndex = (): number => {
  const now = new Date();
  now.setHours(0, 0, 0, 0);
  return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
 };
--- a/web/src/lib/date-utils.ts
+++ b/web/src/lib/date-utils.ts
@@ -0,0 +1,23 @@
 const EPOCH = new Date(0);
 const MS_PER_DAY = 86400000;
 export const dateToDaysFromToday = (dateStr: string): number => {
  const target = new Date(dateStr);
  target.setHours(0, 0, 0, 0);
  const today = new Date();
  today.setHours(0, 0, 0, 0);
  return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY);
 };
 export const dateToIndex = (dateStr: string): number => {
  const d = new Date(dateStr);
  return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY);
 };
 export const todayIndex = (): number => {
  const now = new Date();
  now.setHours(0, 0, 0, 0);
  return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY);
 };
 export { EPOCH, MS_PER_DAY };
--- a/web/src/lib/hotel-utils.ts
+++ b/web/src/lib/hotel-utils.ts
@@ -25,7 +25,7 @@ export interface Hotel {
  nights: number;
 }
-const EPOCH = new Date(0);
+import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
 export const transformProduct = (p: HotelProduct): Hotel => {
  const { id, room_type, date_index, metadata } = p;
@@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => {
    // legacy: treat as offset from today
    const today = new Date();
    today.setHours(0, 0, 0, 0);
-    checkIn = new Date(today.getTime() + date_index * 86400000);
+    checkIn = new Date(today.getTime() + date_index * MS_PER_DAY);
  } else {
    // proper: days since epoch
-    checkIn = new Date(EPOCH.getTime() + date_index * 86400000);
+    checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY);
  }
  const nights = 1;
-  const checkOut = new Date(checkIn.getTime() + nights * 86400000);
+  const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY);
  const formatOpts: Intl.DateTimeFormatOptions = {
    month: 'short',
@@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => {
  };
 };
-// convert date string to days from today
+const hotelImagePool = [
-export const dateToDaysFromToday = (dateStr: string): number => {
+  'photo-1566073771259-6a8506099945',
-  const target = new Date(dateStr);
+  'photo-1551882547-ff40c63fe5fa',
-  target.setHours(0, 0, 0, 0);
+  'photo-1590490360182-c33d57733427',
-  const today = new Date();
+  'photo-1582719478250-c89cae4dc85b',
-  today.setHours(0, 0, 0, 0);
+  'photo-1596701062351-8c2c14d1fdd0',
-  return Math.floor((target.getTime() - today.getTime()) / 86400000);
+  'photo-1631049307264-da0ec9d70304',
  'photo-1578683010236-d716f9a3f461',
  'photo-1540518614846-7eded433c457',
  'photo-1505693416388-ac5ce068fe85',
  'photo-1522771739844-6a9f6d5f14af',
  'photo-1562438668-bcf0ca6578f0',
  'photo-1595576508898-0ad5c879a061',
 ];
 const hashString = (s: string): number => {
  let h = 0;
  for (let i = 0; i < s.length; i++) {
    h = ((h << 5) - h) + s.charCodeAt(i);
    h = h & h;
  }
  return Math.abs(h);
 };
-// convert date string to date_index (days since epoch)
+export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => {
-export const dateToIndex = (dateStr: string): number => {
+  const idx = hashString(hotelId) % hotelImagePool.length;
-  const d = new Date(dateStr);
+  const photoId = hotelImagePool[idx];
-  return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
+  return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`;
 };
-// get current date_index
+export { dateToDaysFromToday, dateToIndex, todayIndex };
 export const todayIndex = (): number => {
  const now = new Date();
  now.setHours(0, 0, 0, 0);
  return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
 };
		`@@ -0,0 +1,2 @@`
							`"""Case-specific simulations and experiments."""`
		`@@ -0,0 +1,2 @@`
							`"""Minimal thesis-aligned pricing simulation (self-contained)."""`