From 5444a4ea13de9cd72a80dec14c8bc44cb850652e Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Fri, 27 Feb 2026 12:45:46 +0100 Subject: [PATCH 1/2] catchup: rogue scripts --- .dockerignore | 17 + .env.sweep.example | 18 + .gitignore | 57 +- AGENTS.md | 1 + engine/sweeps/tpu_jax.yaml | 93 + engine/sweeps/tpu_pod.yaml | 64 + engine/wandb_checkpoint.py | 130 + .../airflow/dags/session_pricing_pipeline.py | 269 ++ experiments/ml/encoder/__init__.py | 1 + experiments/ml/encoder/encoder.py | 210 ++ experiments/notebooks/data_export.ipynb | 957 +++++++ experiments/notebooks/states.ipynb | 1740 +++++++++++++ experiments/notebooks/step_breakdown.ipynb | 2320 +++++++++++++++++ experiments/procesing/tests/test_session.py | 165 ++ lib/separability.py | 128 + paper/src/chapters/slacberger.tex | 69 + paper/src/graphics/gcp.png | Bin 0 -> 19226 bytes paper/src/graphics/gcp.webp | Bin 0 -> 8484 bytes pyproject.toml | 12 + scripts/tpu_pod_run.sh | 32 + scripts/tpu_sync_repo.sh | 83 + scripts/tpu_vm_sweep_agent.py | 183 ++ scripts/tpu_vm_train.sh | 43 + scripts/wandb_agent_bootstrap.sh | 108 + sim/requirements.txt | 7 + sim/rl/behavior_loader/visualize_kl.py | 117 + sim/rl/thesis_core.py | 86 + 27 files changed, 6908 insertions(+), 2 deletions(-) create mode 100644 .dockerignore create mode 100644 .env.sweep.example create mode 120000 AGENTS.md create mode 100644 engine/sweeps/tpu_jax.yaml create mode 100644 engine/sweeps/tpu_pod.yaml create mode 100644 engine/wandb_checkpoint.py create mode 100644 experiments/airflow/dags/session_pricing_pipeline.py create mode 100644 experiments/ml/encoder/__init__.py create mode 100644 experiments/ml/encoder/encoder.py create mode 100644 experiments/notebooks/data_export.ipynb create mode 100644 experiments/notebooks/states.ipynb create mode 100644 experiments/notebooks/step_breakdown.ipynb create mode 100644 experiments/procesing/tests/test_session.py create mode 100644 lib/separability.py create mode 100644 paper/src/chapters/slacberger.tex create mode 100644 paper/src/graphics/gcp.png create mode 100644 paper/src/graphics/gcp.webp create mode 100644 pyproject.toml create mode 100755 scripts/tpu_pod_run.sh create mode 100644 scripts/tpu_sync_repo.sh create mode 100644 scripts/tpu_vm_sweep_agent.py create mode 100644 scripts/tpu_vm_train.sh create mode 100755 scripts/wandb_agent_bootstrap.sh create mode 100644 sim/requirements.txt create mode 100644 sim/rl/behavior_loader/visualize_kl.py create mode 100644 sim/rl/thesis_core.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..2bb1107 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +.git +.venv +.venv-tpu +**/__pycache__ +**/*.pyc +**/*.pyo +**/.pytest_cache +**/.mypy_cache +**/.ruff_cache +**/.ipynb_checkpoints +wandb +build +paper/build +paper/build-cais +node_modules +**/node_modules +*.egg-info diff --git a/.env.sweep.example b/.env.sweep.example new file mode 100644 index 0000000..1cfb168 --- /dev/null +++ b/.env.sweep.example @@ -0,0 +1,18 @@ +# Copy this file to .env.sweep and fill in values. + +# Required for wandb runs and sweep agent workers. +WANDB_API_KEY= +WANDB_ENTITY= +WANDB_PROJECT=phantom-pricing + +# Required for private repo bootstrap workers. +GITHUB_TOKEN= + +# Optional defaults for bootstrap mode. +# REPO_URL=https://github.com/org/repo.git +# BRANCH=main +# WORKDIR=$HOME/PHANTOM-agent +# SWEEP_ID=entity/project/id +# AGENT_COUNT=0 +# AGENT_LOOP=1 +# RETRY_SECONDS=20 diff --git a/.gitignore b/.gitignore index f18e3d4..8ae7e83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,50 @@ +# environment and secrets **/.env +.env.* +!.env.*.example **/.venv + +# python build/cache artifacts **/__pycache__ +phantom.egg-info/ +*.egg-info/ + +# notebook artifacts **/.ipynb_checkpoints/ **/.virtual_documents/ + +# editor/tool state +**/.pdf-view-restore +.nextstep +.ignore-gitlogue +.cloudflare + +# generated svg/graphics **/session_*.svg **/*graph.svg **/auto/*.el + +# misc generated *.old **/package-lock.json **/*.parquet **/_build/ +# paper build artifacts paper/src/bib/auto -**/_build/ paper/src/auto/* paper/src/bib/auto paper/template/* +paper/build-cais/ +paper/src/main.pdf +paper/src/main-blx.bib +paper/src/svg-inkscape/ +paper/src/mirrors/ +paper/variations/ +paper/src/graphics/test_*.png +thesis-latest.pdf + +# experiment run artifacts and logs docs/goals/*.md PHANTOM.wiki/ experiments/airflow/logs/* @@ -23,11 +52,35 @@ experiments/airflow/logs/scheduler/ experiments/airflow/logs/dag_processor_manager/ experiments/collected_data/ experiments/agents/collected_data/ +tests/e2e/test-results/ +tests/e2e/node_modules/** + +# rl/sim run outputs sim/rl/behavior_loader/*.dot sim/rl/behavior_loader/*.png sim/rl/behavior_loader/*.svg sim/rl/behavior_loader/*.pdf -tests/e2e/node_modules/** +sim/rl/runs/ lab/case/thesis/runs*/ sim/case/thesis_simplified/runs*/ + +# model binaries +engine/models/*.zip +*.zip + +# wandb local state +wandb/ + +# data directory (large datasets) +data/ + +# ktem local app data +ktem_app_data/ + +# generated visualization pdfs +*_mdp_viz.pdf +phantom_env_comparison.png +sim/phantom_env_comparison.png + +# web clone PHANTOM_web/* diff --git a/AGENTS.md b/AGENTS.md new file mode 120000 index 0000000..681311e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1 @@ +CLAUDE.md \ No newline at end of file diff --git a/engine/sweeps/tpu_jax.yaml b/engine/sweeps/tpu_jax.yaml new file mode 100644 index 0000000..6b4e001 --- /dev/null +++ b/engine/sweeps/tpu_jax.yaml @@ -0,0 +1,93 @@ +method: bayes +metric: + name: sweep/score + goal: maximize +command: + - ${env} + - python + - -m + - engine.train +parameters: + # fixed: always use JAX backend so TPU chips are actually exercised + use_jax: + value: true + # all four algos have JAX implementations + algo: + values: [ppo, a2c, dqn, qtable] + total_timesteps: + values: [50000, 80000, 120000] + checkpoint_interval: + value: 200000 + seed: + values: [13, 42, 77] + n_products: + values: [8, 10, 12] + # COI framework parameters -- primary research variables + alpha: + distribution: uniform + min: 0.1 + max: 0.6 + lambda_coi: + distribution: uniform + min: 0.05 + max: 0.6 + robust_radius: + distribution: uniform + min: 0.0 + max: 0.3 + robust_points: + values: [3, 5, 7] + info_value: + distribution: uniform + min: 0.5 + max: 2.0 + revenue_weight: + values: [0.005, 0.01, 0.02] + # shared hyperparameters + learning_rate: + distribution: log_uniform_values + min: 1.0e-5 + max: 1.0e-3 + gamma: + values: [0.97, 0.99, 0.995] + # JAX parallelism -- key lever for TPU throughput + jax_num_envs: + values: [8, 16, 32] + jax_num_steps: + values: [64, 128, 256] + jax_num_minibatches: + values: [2, 4, 8] + jax_update_epochs: + values: [2, 4, 8] + # PPO/A2C specific + gae_lambda: + values: [0.9, 0.95, 0.98] + clip_range: + values: [0.1, 0.2, 0.3] + ent_coef: + values: [0.0, 0.005, 0.01] + # DQN specific + buffer_size: + values: [20000, 50000, 100000] + batch_size: + values: [128, 256, 512] + learning_starts: + values: [500, 1000, 3000] + exploration_fraction: + values: [0.1, 0.2, 0.3] + exploration_final_eps: + values: [0.01, 0.03, 0.05] + # QTable specific + q_lr: + values: [0.03, 0.05, 0.1, 0.2] + eps_end: + values: [0.02, 0.05, 0.1] + eps_decay: + values: [0.999, 0.9995, 0.9999] + # action space + action_levels: + values: [7, 9, 11] + action_scale_low: + values: [0.75, 0.8, 0.85] + action_scale_high: + values: [1.15, 1.2, 1.25] diff --git a/engine/sweeps/tpu_pod.yaml b/engine/sweeps/tpu_pod.yaml new file mode 100644 index 0000000..35d8ded --- /dev/null +++ b/engine/sweeps/tpu_pod.yaml @@ -0,0 +1,64 @@ +method: bayes +metric: + name: sweep/score + goal: maximize +command: + - ${env} + - python + - -m + - engine.train +parameters: + use_jax: + value: true + # pmap requires all workers to compile the same computation graph shape, + # so structural params are fixed -- only research/scalar params are swept + algo: + values: [ppo, a2c] + jax_num_envs: + value: 32 + jax_num_steps: + value: 128 + jax_num_minibatches: + value: 4 + jax_update_epochs: + value: 4 + total_timesteps: + value: 100000 + checkpoint_interval: + value: 200000 + n_products: + value: 10 + action_levels: + value: 9 + # research parameters -- primary sweep targets + alpha: + distribution: uniform + min: 0.1 + max: 0.6 + lambda_coi: + distribution: uniform + min: 0.05 + max: 0.6 + robust_radius: + distribution: uniform + min: 0.0 + max: 0.3 + info_value: + distribution: uniform + min: 0.5 + max: 2.0 + revenue_weight: + values: [0.005, 0.01, 0.02] + # training hyperparameters + learning_rate: + distribution: log_uniform_values + min: 1.0e-5 + max: 1.0e-3 + gamma: + values: [0.97, 0.99, 0.995] + gae_lambda: + values: [0.9, 0.95, 0.98] + clip_range: + values: [0.1, 0.2, 0.3] + ent_coef: + values: [0.0, 0.005, 0.01] diff --git a/engine/wandb_checkpoint.py b/engine/wandb_checkpoint.py new file mode 100644 index 0000000..4deea92 --- /dev/null +++ b/engine/wandb_checkpoint.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import hashlib +import json +import re +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any, Mapping + +try: + import wandb + from wandb.errors import CommError + + HAS_WANDB = True +except ImportError: + HAS_WANDB = False + wandb = None # type: ignore[assignment] + CommError = RuntimeError # type: ignore[assignment] + + +def _safe_value(value: Any) -> Any: + if isinstance(value, (str, int, float, bool)) or value is None: + return value + if isinstance(value, (list, tuple)): + return [_safe_value(v) for v in value] + if isinstance(value, dict): + return {str(k): _safe_value(value[k]) for k in sorted(value)} + return str(value) + + +def _safe_scope(scope: str | None) -> str: + raw = "manual" if scope in (None, "") else str(scope) + cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-") + return cleaned or "manual" + + +def checkpoint_artifact_name( + cfg: Mapping[str, Any], *, backend: str, sweep_id: str | None = None +) -> str: + payload = {k: _safe_value(cfg[k]) for k in sorted(cfg)} + scope = _safe_scope(sweep_id) + canonical = json.dumps( + {"backend": backend, "scope": scope, "cfg": payload}, + sort_keys=True, + separators=(",", ":"), + ) + digest = hashlib.sha1(canonical.encode("utf-8")).hexdigest()[:14] + return f"phantom-{backend}-ckpt-{scope}-{digest}"[:128] + + +def _is_missing_artifact_error(exc: Exception) -> bool: + if isinstance(exc, CommError): + msg = str(exc).lower() + return "not found" in msg or "does not exist" in msg + return False + + +def download_latest_checkpoint( + artifact_name: str, *, file_name: str +) -> tuple[Path, dict[str, Any]] | None: + if not HAS_WANDB or wandb.run is None: + return None + try: + artifact = wandb.run.use_artifact(f"{artifact_name}:latest") + except Exception as exc: + if _is_missing_artifact_error(exc): + return None + raise + directory = Path(artifact.download()) + checkpoint_path = directory / file_name + if not checkpoint_path.exists(): + return None + metadata = dict(getattr(artifact, "metadata", {}) or {}) + return checkpoint_path, metadata + + +def _aliases_from_metadata(metadata: dict[str, Any] | None) -> list[str]: + aliases = ["latest"] + if metadata is None: + return aliases + if "step" in metadata: + try: + aliases.append(f"step-{int(metadata['step'])}") + except (TypeError, ValueError): + pass + return aliases + + +def log_checkpoint_bytes( + artifact_name: str, + *, + file_name: str, + payload: bytes, + metadata: dict[str, Any] | None = None, +) -> bool: + if not HAS_WANDB or wandb.run is None: + return False + with TemporaryDirectory(prefix="phantom-ckpt-") as tmpdir: + path = Path(tmpdir) / file_name + path.write_bytes(payload) + artifact = wandb.Artifact( + name=artifact_name, + type="checkpoint", + metadata=metadata or {}, + ) + artifact.add_file(path.as_posix(), name=file_name) + wandb.log_artifact(artifact, aliases=_aliases_from_metadata(metadata)) + return True + + +def log_checkpoint_file( + artifact_name: str, + *, + file_path: str | Path, + artifact_file_name: str, + metadata: dict[str, Any] | None = None, +) -> bool: + if not HAS_WANDB or wandb.run is None: + return False + src = Path(file_path) + if not src.exists(): + return False + artifact = wandb.Artifact( + name=artifact_name, + type="checkpoint", + metadata=metadata or {}, + ) + artifact.add_file(src.as_posix(), name=artifact_file_name) + wandb.log_artifact(artifact, aliases=_aliases_from_metadata(metadata)) + return True diff --git a/experiments/airflow/dags/session_pricing_pipeline.py b/experiments/airflow/dags/session_pricing_pipeline.py new file mode 100644 index 0000000..ab8db77 --- /dev/null +++ b/experiments/airflow/dags/session_pricing_pipeline.py @@ -0,0 +1,269 @@ +""" +Session-Aware Pricing DAG +THIS implements the core pricing computation (policy layer). + +Flow: τ → θ̂ → D → p* + 1. Fetch recent sessions from Kafka (last 10 active) + 2. Extract features per session (τ → θ̂) + 3. Map features to demand proxy (θ̂ → D) + 4. Compute optimal prices (D → p*) + 5. Write to Redis session:{sessionId}:prices + +Scheduled: every 1 minute when enabled +""" +from airflow import DAG +from airflow.operators.python import PythonOperator +from airflow.utils.dates import days_ago +from datetime import timedelta +import pandas as pd +import numpy as np +import logging +import sys +import pickle + +sys.path.insert(0, '/opt/airflow') + +from procesing.context import PipelineContext +from procesing.providers import SupabaseProvider, BackendAPIProvider +from procesing.steps.session import ExtractSessionFeaturesStep +from procesing.pricers.simple import SimpleSurgePricer, session_features_to_demand +from procesing.pricing import StateSpace +from lib.model_registry import ModelRegistry + +DEFAULT_ARGS = { + 'owner': 'phantom-research', + 'depends_on_past': False, + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(seconds=30), +} + + +class CompositeProvider(SupabaseProvider, BackendAPIProvider): + def __init__(self): + SupabaseProvider.__init__(self) + BackendAPIProvider.__init__(self) + + +def _get_context(store_mode: str = 'hotel') -> PipelineContext: + return PipelineContext(provider=CompositeProvider(), store_mode=store_mode) + + +def fetch_recent_sessions(**kwargs): + """ + Task: Fetch last N active sessions from Kafka. + Returns: DataFrame of interaction events for recent sessions. + """ + dag_conf = kwargs.get('dag_run').conf if kwargs.get('dag_run') else {} + store_mode = dag_conf.get('store_mode', 'hotel') + session_limit = dag_conf.get('session_limit', 10) + + ctx = _get_context(store_mode) + provider = ctx.provider + + # fetch all recent interactions from Kafka + try: + interactions_df = provider.fetch_kafka_topic("user-interactions") + except Exception as e: + logging.error(f"Failed to fetch interactions: {e}") + kwargs['ti'].xcom_push(key='sessions_data', value=pickle.dumps(pd.DataFrame())) + return 0 + + if interactions_df.empty or 'sessionId' not in interactions_df.columns: + kwargs['ti'].xcom_push(key='sessions_data', value=pickle.dumps(pd.DataFrame())) + return 0 + + # identify last N active sessions (most recent by event count) + recent_sessions = interactions_df['sessionId'].value_counts().head(session_limit).index.tolist() + + # filter to only those sessions + filtered_df = interactions_df[interactions_df['sessionId'].isin(recent_sessions)].copy() + + kwargs['ti'].xcom_push(key='sessions_data', value=pickle.dumps(filtered_df)) + kwargs['ti'].xcom_push(key='session_ids', value=recent_sessions) + + logging.info(f"Fetched {len(filtered_df)} events for {len(recent_sessions)} sessions") + return len(recent_sessions) + + +def extract_session_features(**kwargs): + """ + Task: Extract behavioral features from session trajectories. + THIS implements τ → θ̂ transformation. + """ + ti = kwargs['ti'] + sessions_df = pickle.loads(ti.xcom_pull(key='sessions_data')) + + if sessions_df.empty: + ti.xcom_push(key='session_features', value=pickle.dumps(pd.DataFrame())) + return 0 + + dag_conf = kwargs.get('dag_run').conf if kwargs.get('dag_run') else {} + ctx = _get_context(dag_conf.get('store_mode', 'hotel')) + + # extract features using vectorized pipeline + feature_extractor = ExtractSessionFeaturesStep(ctx) + features_df = feature_extractor.transform(sessions_df) + + ti.xcom_push(key='session_features', value=pickle.dumps(features_df)) + + logging.info(f"Extracted {len(features_df.columns)} features for {len(features_df)} sessions") + logging.info(f"Feature columns: {list(features_df.columns)}") + logging.info(f"Sample features (first session):\n{features_df.iloc[0].to_dict()}") + + return len(features_df) + + +def compute_session_prices(**kwargs): + """ + Task: Compute optimal prices for each session. + THIS implements θ̂ → D → p* transformation. + """ + ti = kwargs['ti'] + features_df = pickle.loads(ti.xcom_pull(key='session_features')) + + if features_df.empty: + ti.xcom_push(key='price_results', value=pickle.dumps({})) + return 0 + + dag_conf = kwargs.get('dag_run').conf if kwargs.get('dag_run') else {} + store_mode = dag_conf.get('store_mode', 'hotel') + ctx = _get_context(store_mode) + + # fetch product catalog for base prices + products_df = ctx.provider.fetch_products(store_mode) + if products_df.empty: + logging.error("No products found in catalog") + ti.xcom_push(key='price_results', value=pickle.dumps({})) + return 0 + + products_df['base_price'] = products_df['metadata'].apply( + lambda m: m.get('base_price', 100.0) if isinstance(m, dict) else 100.0 + ) + + # initialize pricing model + pricer = SimpleSurgePricer( + high_threshold=dag_conf.get('high_threshold', 10), + low_threshold=dag_conf.get('low_threshold', 2), + surge_multiplier=dag_conf.get('surge_multiplier', 1.15), + discount_multiplier=dag_conf.get('discount_multiplier', 0.95) + ) + pricer.fit(products_df) + + # compute prices per session + price_results = {} + n_products = len(products_df) + + logging.info(f"Starting price computation for {len(features_df)} sessions, {n_products} products") + logging.info(f"Pricer config: high_thresh={pricer.high_threshold}, low_thresh={pricer.low_threshold}, surge_mult={pricer.surge_multiplier}") + + for idx, session_row in features_df.iterrows(): + session_id = session_row.get('sessionId') + if not session_id: + continue + + # map features to demand proxy (θ̂ → D) + session_features_single = pd.DataFrame([session_row]) + demand_proxy = session_features_to_demand(session_features_single) + + logging.info(f"[Session {session_id}] Features → Demand: {demand_proxy:.2f}") + logging.info(f"[Session {session_id}] Key features: velocity={session_row.get('interaction_velocity', 0):.2f}, cart_ratio={session_row.get('cart_to_view_ratio', 0):.2f}, item_views={session_row.get('item_views', 0)}") + + # build state space + state_space = StateSpace( + demand=np.full(n_products, demand_proxy), # broadcast session demand to all products + prices=products_df['base_price'].values, + session_features=session_features_single + ) + + # compute optimal prices (D → p*) + optimal_prices = pricer.predict(state_space) + + base_avg = products_df['base_price'].mean() + optimal_avg = optimal_prices.mean() + price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100 + + logging.info(f"[Session {session_id}] Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%") + + # store as dict {productId: price} + price_map = { + str(products_df.iloc[i]['id']): float(optimal_prices[i]) + for i in range(n_products) + } + + price_results[session_id] = price_map + + ti.xcom_push(key='price_results', value=pickle.dumps(price_results)) + + logging.info(f"Computed prices for {len(price_results)} sessions, {n_products} products each") + return len(price_results) + + +def publish_to_registry(**kwargs): + """ + Task: Write session prices to Redis registry. + THIS is the write path: prices → session:{sessionId}:prices + """ + ti = kwargs['ti'] + price_results = pickle.loads(ti.xcom_pull(key='price_results')) + + if not price_results: + logging.warning("No prices to publish") + return 0 + + registry = ModelRegistry() + ttl = kwargs.get('dag_run').conf.get('ttl', 1800) if kwargs.get('dag_run') and kwargs.get('dag_run').conf else 1800 + + published_count = 0 + for session_id, price_map in price_results.items(): + registry.set_session_prices(session_id, price_map, ttl=ttl) + published_count += 1 + + logging.info(f"Published prices for {published_count} sessions to registry (TTL={ttl}s)") + + return { + 'sessions_published': published_count, + 'products_per_session': len(next(iter(price_results.values()))) if price_results else 0, + 'status': 'success' + } + + +# DAG definition +with DAG( + 'session_pricing_pipeline', + default_args=DEFAULT_ARGS, + description='Session-aware pricing: extract features → compute prices → publish to registry', + schedule_interval='*/1 * * * *', # every 1 minute + start_date=days_ago(1), + catchup=False, + max_active_runs=1, + tags=['pricing', 'session-aware', 'research', 'real-time'], +) as dag: + + t_fetch_sessions = PythonOperator( + task_id='fetch_recent_sessions', + python_callable=fetch_recent_sessions, + provide_context=True, + ) + + t_extract_features = PythonOperator( + task_id='extract_session_features', + python_callable=extract_session_features, + provide_context=True, + ) + + t_compute_prices = PythonOperator( + task_id='compute_session_prices', + python_callable=compute_session_prices, + provide_context=True, + ) + + t_publish = PythonOperator( + task_id='publish_to_registry', + python_callable=publish_to_registry, + provide_context=True, + ) + + # linear dependency: fetch → extract → compute → publish + t_fetch_sessions >> t_extract_features >> t_compute_prices >> t_publish diff --git a/experiments/ml/encoder/__init__.py b/experiments/ml/encoder/__init__.py new file mode 100644 index 0000000..0b18d1a --- /dev/null +++ b/experiments/ml/encoder/__init__.py @@ -0,0 +1 @@ +from .encoder import Window, extract_windows, build_windows, WindowDataset, PrototypeClassifier, train, loocv diff --git a/experiments/ml/encoder/encoder.py b/experiments/ml/encoder/encoder.py new file mode 100644 index 0000000..2d9f3c2 --- /dev/null +++ b/experiments/ml/encoder/encoder.py @@ -0,0 +1,210 @@ +"""Contrastive encoder via trajectory windowing. Classification by prototype distance.""" +import sys +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader") +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml") + +from sim.rl.behavior_loader.loader import JointLoader, PayloadModel +from arch import TrajectoryEncoder, featurize_trajectory, nt_xent_loss +from typing import List, Dict, Tuple +from dataclasses import dataclass +from datetime import datetime +import numpy as np, torch, torch.nn.functional as F, random, optuna +from torch.utils.data import Dataset, DataLoader +from torch.optim import Adam +from torch.utils.tensorboard import SummaryWriter + +RUNS = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs" +AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" +HUMAN_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + + +@dataclass +class Window: + events: List[PayloadModel] + traj_id: str + label: int # 0=human, 1=agent + + +def extract_windows(events: List[PayloadModel], traj_id: str, label: int, + sizes: List[int] = [5, 10, 15], stride: int = 2) -> List[Window]: + """Multi-scale overlapping windows from trajectory""" + n = len(events) + wins = [Window(events[i:i+s], traj_id, label) for s in sizes if n >= s for i in range(0, n-s+1, stride)] + if n >= 3: wins.append(Window(events, traj_id, label)) # full traj + return wins + + +def build_windows(data: Dict[str, List], sizes=[5,10,15], stride=2) -> List[Window]: + return [w for tid, evts in data.items() + for w in extract_windows(evts, tid, 0 if tid.startswith('human_') else 1, sizes, stride)] + + +class WindowDataset(Dataset): + """Yields (anchor, positive) pairs from same class""" + def __init__(self, windows: List[Window], dim: int = 64): + self.wins, self.dim = windows, dim + self.by_label = {0: [i for i,w in enumerate(windows) if w.label==0], + 1: [i for i,w in enumerate(windows) if w.label==1]} + self.by_traj = {} + for i, w in enumerate(windows): self.by_traj.setdefault(w.traj_id, []).append(i) + + def __len__(self): return len(self.wins) + + def _feat(self, evts): return featurize_trajectory(evts, None, self.dim) + + def _aug(self, evts): # subsample 70-100% + if len(evts) < 4: return evts + k = max(3, int(len(evts) * random.uniform(0.7, 1.0))) + start = random.randint(0, len(evts) - k) + return evts[start:start+k] + + def __getitem__(self, idx): + w = self.wins[idx] + pool = [i for i in self.by_label[w.label] if self.wins[i].traj_id != w.traj_id] + pos_idx = random.choice(pool) if pool else idx + a = torch.tensor(self._feat(self._aug(w.events)), dtype=torch.float32) + p = torch.tensor(self._feat(self._aug(self.wins[pos_idx].events)), dtype=torch.float32) + return a, p, w.label + + +class PrototypeClassifier: + """Classify by distance to class centroids""" + def __init__(self, encoder: TrajectoryEncoder, device = 'cuda', dim=64): + self.enc, self.dev, self.dim = encoder, device, dim + self.centroids = {0: None, 1: None} + + def fit(self, windows: List[Window]): + self.enc.eval() + embs = {0: [], 1: []} + with torch.no_grad(): + for w in windows: + x = torch.tensor(featurize_trajectory(w.events, None, self.dim), dtype=torch.float32) + z = self.enc(x.unsqueeze(0).unsqueeze(1).to(self.dev)) + embs[w.label].append(z) + self.centroids = {k: torch.cat(v).mean(0, keepdim=True) if v else None for k, v in embs.items()} + return self + + def predict(self, events: List[PayloadModel]) -> Tuple[int, float, Dict]: + """Returns (pred, confidence, debug). Confidence via softmax over -distances.""" + self.enc.eval() + with torch.no_grad(): + x = torch.tensor(featurize_trajectory(events, None, self.dim), dtype=torch.float32) + z = self.enc(x.unsqueeze(0).unsqueeze(1).to(self.dev)) + dists = {k: torch.norm(z - c, dim=1).item() for k, c in self.centroids.items() if c is not None} + if not dists: return 0, 0.0, {'d': {}, 'p': [0.5, 0.5]} + pred = min(dists, key=dists.get) + d0, d1 = dists.get(0, 1e6), dists.get(1, 1e6) # softmax(-d) gives higher prob to closer centroid + probs = F.softmax(torch.tensor([[-d0, -d1]]), dim=1).squeeze() + return pred, probs[pred].item(), {'d': dists, 'p': probs.tolist()} + + +def train(epochs=200, lr=5e-4, batch=16, dim=64, emb=32, temp=0.5, + sizes=[5,10,15], stride=2, name=None, verbose=True): + data = JointLoader(HUMAN_DIR, AGENT_DIR).get_data() + wins = build_windows(data, sizes, stride) + if verbose: print(f"Windows: {len(wins)} ({sum(w.label==0 for w in wins)}h/{sum(w.label==1 for w in wins)}a)") + + dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + enc = TrajectoryEncoder(dim, emb).to(dev) + opt = Adam(enc.parameters(), lr=lr) + loader = DataLoader(WindowDataset(wins, dim), batch_size=batch, shuffle=True, drop_last=True) + + name = name or f"enc_{dim}_{emb}_{datetime.now():%Y%m%d_%H%M%S}" + writer = SummaryWriter(f"{RUNS}/encoder/{name}") + + for ep in range(epochs): + enc.train() + total, n = 0.0, 0 + for a, p, _ in loader: + loss = nt_xent_loss(enc(a.unsqueeze(1).to(dev)), enc(p.unsqueeze(1).to(dev)), temp) + opt.zero_grad(); loss.backward(); opt.step() + total += loss.item(); n += 1 + avg = total / max(n, 1) + writer.add_scalar('loss-ntxent', avg, ep) + if verbose and (ep+1) % 20 == 0: print(f"Epoch {ep+1}: {avg:.4f}") + + writer.close() + return enc, wins, dev + + +def loocv(epochs=100, lr=5e-4, dim=64, emb=32, temp=0.5, sizes=[5,10,15], stride=2, verbose=True): + """Leave-one-trajectory-out CV""" + data = JointLoader(HUMAN_DIR, AGENT_DIR).get_data() + dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + results = [] + + for test_id in data: + train_data = {k: v for k, v in data.items() if k != test_id} + if not any(k.startswith('human_') for k in train_data) or not any(k.startswith('agent_') for k in train_data): + continue + + wins = build_windows(train_data, sizes, stride) + enc = TrajectoryEncoder(dim, emb).to(dev) + opt = Adam(enc.parameters(), lr=lr) + loader = DataLoader(WindowDataset(wins, dim), batch_size=min(16, len(wins)//2 or 1), + shuffle=True, drop_last=len(wins)>2) + + for _ in range(epochs): + enc.train() + for a, p, _ in loader: + loss = nt_xent_loss(enc(a.unsqueeze(1).to(dev)), enc(p.unsqueeze(1).to(dev)), temp) + opt.zero_grad(); loss.backward(); opt.step() + + clf = PrototypeClassifier(enc, dev, dim).fit(wins) + pred, conf, dbg = clf.predict(data[test_id]) + actual = 0 if test_id.startswith('human_') else 1 + results.append((pred, actual, conf)) + if verbose: print(f"{test_id[:18]}: pred={pred} conf={conf:.2f} actual={actual} {'OK' if pred==actual else 'MISS'}") + + if results: + acc = sum(p==a for p,a,_ in results) / len(results) + if verbose: print(f"\nAccuracy: {acc:.1%} ({sum(p==a for p,a,_ in results)}/{len(results)})") + return acc, results + return 0.0, [] + + +def hparam_tune(n_trials=50, epochs=60, n_jobs=2, verbose=True): + """Optuna hyperparameter search maximizing LOOCV accuracy""" + def objective(trial): + lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True) + dim = trial.suggest_categorical('dim', [32, 64, 128, 256]) + emb = trial.suggest_categorical('emb', [16, 32, 64, 128]) + temp = trial.suggest_float('temp', 0.05, 1.0) + stride = trial.suggest_int('stride', 1, 4) + sizes = [trial.suggest_int(f's{i}', 3, 20) for i in range(3)] + sizes = sorted(set(sizes)) # unique sorted + acc, _ = loocv(epochs, lr, dim, emb, temp, sizes, stride, verbose=False) + return acc + + study = optuna.create_study(direction='maximize', study_name='encoder_hparam', + sampler=optuna.samplers.TPESampler(seed=42)) + study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs, show_progress_bar=verbose) + + best = study.best_params + if verbose: + print(f"\nBest accuracy: {study.best_value:.1%}") + print(f"Best params: {best}") + return best, study + + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument('--mode', choices=['train', 'eval', 'hparam'], default='train') + p.add_argument('--epochs', type=int, default=200) + p.add_argument('--lr', type=float, default=5e-4) + p.add_argument('--dim', type=int, default=128) + p.add_argument('--emb', type=int, default=64) + p.add_argument('--temp', type=float, default=0.1) + p.add_argument('--sizes', type=str, default='5,10,15') + p.add_argument('--stride', type=int, default=2) + p.add_argument('--n_trials', type=int, default=50) + args = p.parse_args() + sizes = [int(x) for x in args.sizes.split(',')] + + if args.mode == 'train': + enc, wins, dev = train(args.epochs, args.lr, 16, args.dim, args.emb, args.temp, sizes, args.stride) + elif args.mode == 'hparam': + best, study = hparam_tune(args.n_trials, min(args.epochs, 60)) + else: + loocv(args.epochs, args.lr, args.dim, args.emb, args.temp, sizes, args.stride) diff --git a/experiments/notebooks/data_export.ipynb b/experiments/notebooks/data_export.ipynb new file mode 100644 index 0000000..7cd9366 --- /dev/null +++ b/experiments/notebooks/data_export.ipynb @@ -0,0 +1,957 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "id": "62eafcd9-5462-4063-8873-0e7fb9add907", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from kafka import KafkaConsumer\n", + "import pandas as pd\n", + "import json\n", + "import numpy as np\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import display, SVG, Image\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 73 entries, 0 to 72\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sessionId 73 non-null object \n", + " 1 eventName 73 non-null object \n", + " 2 page 73 non-null object \n", + " 3 productId 67 non-null object \n", + " 4 storeMode 73 non-null object \n", + " 5 userAgent 73 non-null object \n", + " 6 ts 73 non-null object \n", + " 7 metadata_referrer 6 non-null object \n", + " 8 metadata_roomType 45 non-null object \n", + " 9 metadata_price 45 non-null float64\n", + " 10 metadata_nights 45 non-null float64\n", + " 11 metadata_elementText 22 non-null object \n", + " 12 metadata_dwellTime 22 non-null float64\n", + "dtypes: float64(3), object(10)\n", + "memory usage: 7.5+ KB\n" + ] + } + ], + "source": [ + "KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n", + "topic = \"user-interactions\"\n", + "consumer = KafkaConsumer(\n", + " topic, \n", + " enable_auto_commit=True,\n", + " value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n", + " auto_offset_reset='earliest', \n", + " bootstrap_servers=['localhost:9092'])\n", + "messages=consumer.poll(timeout_ms=1000,max_records=10000)\n", + "df = []\n", + "for m in messages.values():\n", + " for i in m:\n", + " df.append(i.value)\n", + "df = pd.DataFrame(df)\n", + "# explode metadata col json\n", + "df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f6819a1c-32ab-49c7-845b-5df7bf60f561", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_roomTypemetadata_pricemetadata_nightsmetadata_elementTextmetadata_dwellTime
0d176d7c9-4027-4702-9e31-2a71395cdda0page_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:23:46.270ZNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1f0317a5d-e424-44e9-b784-c8f7291ffe31page_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:00.291ZNaNNaNNaNNaNNaN
2f0317a5d-e424-44e9-b784-c8f7291ffe31page_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:07.769ZNaNNaNNaNNaNNaN
3f0317a5d-e424-44e9-b784-c8f7291ffe31view_item_page/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:15.010ZNaNPremium Room269.01.0NaNNaN
4238dc588-a7ab-4c0e-bccd-6abca5076c66page_view/productsNonehotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:15.457ZNaNNaNNaNNaNNaN
5238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:15.591ZNaNPremium Room264.02.0NaNNaNNaNNaNNaNNaNNaN
432214d9fad-9b00-40c3-bd0e-7739b6acd654click1762448192425DIVNaNNaNNaNNaNNaN/NaN1623.0493.0NaNNaNNaNNaNNaNNaN
6238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:21.483ZNaNPremium Room264.02.0NaNNaN
7238dc588-a7ab-4c0e-bccd-6abca5076c66hover_over_title/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:22.646ZNaNNaNNaNNaNGrand Plaza Hotel1200.0
8238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:25.889ZNaNPremium Room264.02.0NaNNaN
35013fc334-4045-4d5a-8739-dd0a8766a63bpage_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:53:59.993ZNaNNaNNaNNaNNaN
36013fc334-4045-4d5a-8739-dd0a8766a63bview_item_page/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:10.705ZNaNPremium Room223.03.0NaNNaN
37013fc334-4045-4d5a-8739-dd0a8766a63bhover_over_title/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:11.771ZNaNNaN416.0397.0NaNNaNNaNNaNNaNNaNGrand Plaza Hotel1200.0
38013fc334-4045-4d5a-8739-dd0a8766a63bview_item_page/productshtl-1hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:29.772ZNaNStandard Room267.05.0NaNNaN
39013fc334-4045-4d5a-8739-dd0a8766a63bhover_over_title/productshtl-1hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:30.833ZNaNNaNNaNNaNSeaside Resort1200.0
\n", + "
" + ], + "text/plain": [ + " sessionId eventName page \\\n", + "0 d176d7c9-4027-4702-9e31-2a71395cdda0 page_view /products \n", + "1 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view / \n", + "2 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view /products \n", + "3 f0317a5d-e424-44e9-b784-c8f7291ffe31 view_item_page /products \n", + "4 238dc588-a7ab-4c0e-bccd-6abca5076c66 page_view /products \n", + "5 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "6 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "7 238dc588-a7ab-4c0e-bccd-6abca5076c66 hover_over_title /products \n", + "8 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "35 013fc334-4045-4d5a-8739-dd0a8766a63b page_view /products \n", + "36 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", + "37 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", + "38 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", + "39 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", + "\n", + " productId storeMode userAgent \\\n", + "0 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "1 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "2 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "3 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "4 None hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "5 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "6 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "7 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "8 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "35 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "36 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "37 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "38 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "39 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "\n", + " ts metadata_referrer metadata_roomType \\\n", + "0 2025-11-14T13:23:46.270Z NaN \n", + "1 2025-11-14T13:26:00.291Z NaN \n", + "2 2025-11-14T13:26:07.769Z NaN \n", + "3 2025-11-14T13:26:15.010Z NaN Premium Room \n", + "4 2025-11-14T13:27:15.457Z NaN \n", + "5 2025-11-14T13:27:15.591Z NaN Premium Room \n", + "6 2025-11-14T13:27:21.483Z NaN Premium Room \n", + "7 2025-11-14T13:27:22.646Z NaN NaN \n", + "8 2025-11-14T13:27:25.889Z NaN Premium Room \n", + "35 2025-11-14T13:53:59.993Z NaN \n", + "36 2025-11-14T13:54:10.705Z NaN Premium Room \n", + "37 2025-11-14T13:54:11.771Z NaN NaN \n", + "38 2025-11-14T13:54:29.772Z NaN Standard Room \n", + "39 2025-11-14T13:54:30.833Z NaN NaN \n", + "\n", + " metadata_price metadata_nights metadata_elementText metadata_dwellTime \n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 269.0 1.0 NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "5 264.0 2.0 NaN NaN \n", + "6 264.0 2.0 NaN NaN \n", + "7 NaN NaN Grand Plaza Hotel 1200.0 \n", + "8 264.0 2.0 NaN NaN \n", + "35 NaN NaN NaN NaN \n", + "36 223.0 3.0 NaN NaN \n", + "37 NaN NaN Grand Plaza Hotel 1200.0 \n", + "38 267.0 5.0 NaN NaN \n", + "39 NaN NaN Seaside Resort 1200.0 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('sessionId').head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "380eca5f-8304-4fb2-be32-e8bcfd312085", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['013fc334-4045-4d5a-8739-dd0a8766a63b',\n", + " '238dc588-a7ab-4c0e-bccd-6abca5076c66',\n", + " 'd176d7c9-4027-4702-9e31-2a71395cdda0',\n", + " 'f0317a5d-e424-44e9-b784-c8f7291ffe31']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sessions = list(set(df['sessionId'])); sessions # 238dc588-a7ab-4c0e-bccd-6abca5076c66" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1", + "metadata": {}, + "outputs": [], + "source": [ + "# map sessions to experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "050d90a4-20a9-47f5-b998-c31178a54cb3", + "metadata": {}, + "outputs": [], + "source": [ + "def build_transition_prob_matrix(df: pd.DataFrame):\n", + " df = df.dropna(subset=['eventName'])\n", + " events = df['eventName'].tolist()\n", + " labels = pd.Index(events).unique().tolist()\n", + " idx = {e:i for i,e in enumerate(labels)}\n", + " M = np.zeros((len(labels), len(labels)), dtype=float)\n", + " for a, b in zip(events, events[1:]):\n", + " M[idx[a], idx[b]] += 1\n", + " row_sums = M.sum(axis=1, keepdims=True)\n", + " with np.errstate(divide='ignore', invalid='ignore'):\n", + " P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n", + " return P, labels" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e68f9004-82f5-4826-aece-e3dc6e15a18f", + "metadata": {}, + "outputs": [], + "source": [ + "# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n", + "from graphviz import Digraph\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "def _as_prob_df(matrix, labels=None):\n", + " \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n", + " if isinstance(matrix, pd.DataFrame):\n", + " # Ensure square and aligned\n", + " assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n", + " return matrix\n", + " matrix = np.asarray(matrix, dtype=float)\n", + " assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n", + " if labels is None:\n", + " raise ValueError(\"labels are required when matrix is not a DataFrame\")\n", + " assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n", + " return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n", + "\n", + "def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n", + " \"\"\"Build weighted edges > threshold.\"\"\"\n", + " edges = []\n", + " for src in P.index:\n", + " for dst in P.columns:\n", + " w = float(P.loc[src, dst])\n", + " if w > threshold:\n", + " edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n", + " return edges\n", + "\n", + "def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n", + " \"\"\"\n", + " fname: output file stem (no extension)\n", + " matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n", + " ls_index: ordered labels (required if matrix is not a DataFrame)\n", + " threshold: hide edges with weight <= threshold\n", + " fmt: 'svg'|'png'|'pdf' etc.\n", + " view: open after rendering\n", + " \"\"\"\n", + " P = _as_prob_df(matrix, labels=ls_index)\n", + " edges = _df_to_edgelist(P, threshold=threshold)\n", + "\n", + " g = Digraph(format=fmt)\n", + " g.attr(rankdir=\"LR\", size=\"30\")\n", + " g.attr(\"node\", shape=\"circle\")\n", + "\n", + " # ensure isolated nodes appear\n", + " for node in P.index:\n", + " g.node(str(node), width=\"1\", height=\"1\")\n", + "\n", + " for src, dst, label in edges:\n", + " g.edge(src, dst, label=label)\n", + "\n", + " g.render(fname, view=view, cleanup=True)\n", + " return g\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "013fc334-4045-4d5a-8739-dd0a8766a63b\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n", + "view_item_page\n", + "\n", + "view_item_page\n", + "\n", + "\n", + "\n", + "page_view->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "view_item_page->view_item_page\n", + "\n", + "\n", + "0.68\n", + "\n", + "\n", + "\n", + "hover_over_title\n", + "\n", + "hover_over_title\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_title\n", + "\n", + "\n", + "0.29\n", + "\n", + "\n", + "\n", + "hover_over_paragraph\n", + "\n", + "hover_over_paragraph\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_paragraph\n", + "\n", + "\n", + "0.04\n", + "\n", + "\n", + "\n", + "hover_over_title->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n", + " [0.00000000e+000 6.78571429e-001 2.85714286e-001 3.57142857e-002]\n", + " [0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n", + " [2.05833592e-312 2.29175545e-312 4.94065646e-324 6.92110218e-310]]\n", + "238dc588-a7ab-4c0e-bccd-6abca5076c66\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n", + "view_item_page\n", + "\n", + "view_item_page\n", + "\n", + "\n", + "\n", + "page_view->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "view_item_page->view_item_page\n", + "\n", + "\n", + "0.19\n", + "\n", + "\n", + "\n", + "hover_over_title\n", + "\n", + "hover_over_title\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_title\n", + "\n", + "\n", + "0.38\n", + "\n", + "\n", + "\n", + "hover_over_paragraph\n", + "\n", + "hover_over_paragraph\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_paragraph\n", + "\n", + "\n", + "0.44\n", + "\n", + "\n", + "\n", + "hover_over_title->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->page_view\n", + "\n", + "\n", + "0.14\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->view_item_page\n", + "\n", + "\n", + "0.86\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 1. 0. 0. ]\n", + " [0. 0.1875 0.375 0.4375 ]\n", + " [0. 1. 0. 0. ]\n", + " [0.14285714 0.85714286 0. 0. ]]\n", + "d176d7c9-4027-4702-9e31-2a71395cdda0\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.]]\n", + "f0317a5d-e424-44e9-b784-c8f7291ffe31\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n", + "page_view->page_view\n", + "\n", + "\n", + "0.50\n", + "\n", + "\n", + "\n", + "view_item_page\n", + "\n", + "view_item_page\n", + "\n", + "\n", + "\n", + "page_view->view_item_page\n", + "\n", + "\n", + "0.50\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[5.0e-001 5.0e-001]\n", + " [9.9e-324 1.5e-323]]\n" + ] + } + ], + "source": [ + "def explore_session(session_id: str):\n", + " subset = df[df['sessionId'] == session_id]\n", + " print(session_id)\n", + " P, labels = build_transition_prob_matrix(subset)\n", + " g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n", + " display(g)\n", + " return P\n", + "for session in sessions:\n", + " print(explore_session(session))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (PHANTOM)", + "language": "python", + "name": "phantom" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/experiments/notebooks/states.ipynb b/experiments/notebooks/states.ipynb new file mode 100644 index 0000000..8948ae2 --- /dev/null +++ b/experiments/notebooks/states.ipynb @@ -0,0 +1,1740 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5abf66eb-f9ab-4680-a4f8-2a59d0989644", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "project_root = \"/home/velocitatem/Documents/Projects/PHANTOM/experiments\"\n", + "from pathlib import Path\n", + "if str(Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()) not in sys.path:\n", + " sys.path.insert(0, str(project_root))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "67e2b839-83a2-4e78-b96e-dfa6d3ad72f0", + "metadata": {}, + "outputs": [], + "source": [ + "from procesing.steps import (\n", + " ExtractSessionFeaturesStep,\n", + " _extract_features_for_session,\n", + " FetchInteractionsStep,\n", + ")\n", + "from procesing.context import PipelineContext\n", + "from procesing.providers import SupabaseProvider, BackendAPIProvider" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3d8993fa-803d-43b9-8b27-a54d89b6c4d7", + "metadata": {}, + "outputs": [], + "source": [ + "class Provider(SupabaseProvider, BackendAPIProvider):\n", + " def __init__(self, backend_url: str):\n", + " SupabaseProvider.__init__(self)\n", + " BackendAPIProvider.__init__(self, backend_url=backend_url)\n", + "# example run\n", + "context = PipelineContext(\n", + " provider=Provider(backend_url=\"http://localhost:5000\"),\n", + " store_mode='hotel',\n", + " window_size='5min',\n", + "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f2a52096-4c3c-4168-bf5b-5567c7aade41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementTextmetadata_dateIndexmetadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndex
0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:20:13.061ZNaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
1d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:17.425ZNaNJunior Suite1.01200.0NaNNaNNaNNaNNaNNaN1
2d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_paragraph/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:19.496ZNaNprice1.01202.0NaNNaNNaNNaNNaNNaN1
3d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/hotel/products/d018efc1-25e9-4284-b276-80386e...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:21.922Zhttp://localhost:3000/hotel/products?dateIndex...NaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
4d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonelearn_more_about_item/hotel/products/d018efc1-25e9-4284-b276-80386e...d018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:22.674ZNaNNaN1.0NaNhotelJunior SuiteNaNNaNNaNNaN1
\n", + "
" + ], + "text/plain": [ + " sessionId experimentId eventName \\\n", + "0 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "1 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_title \n", + "2 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_paragraph \n", + "3 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "4 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None learn_more_about_item \n", + "\n", + " page \\\n", + "0 / \n", + "1 /hotel/products \n", + "2 /hotel/products \n", + "3 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "4 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "\n", + " productId storeMode \\\n", + "0 None hotel \n", + "1 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "2 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "3 None hotel \n", + "4 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "\n", + " userAgent \\\n", + "0 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "1 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "4 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts \\\n", + "0 2025-11-25T20:20:13.061Z \n", + "1 2025-11-25T20:21:17.425Z \n", + "2 2025-11-25T20:21:19.496Z \n", + "3 2025-11-25T20:21:21.922Z \n", + "4 2025-11-25T20:21:22.674Z \n", + "\n", + " metadata_referrer metadata_elementText \\\n", + "0 NaN \n", + "1 NaN Junior Suite \n", + "2 NaN price \n", + "3 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "4 NaN NaN \n", + "\n", + " metadata_dateIndex metadata_dwellTime metadata_type metadata_roomType \\\n", + "0 NaN NaN NaN NaN \n", + "1 1.0 1200.0 NaN NaN \n", + "2 1.0 1202.0 NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 1.0 NaN hotel Junior Suite \n", + "\n", + " metadata_price metadata_nights metadata_total metadata_itemCount \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " dateIndex \n", + "0 \n", + "1 1 \n", + "2 1 \n", + "3 \n", + "4 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df=FetchInteractionsStep(context).transform(None)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "72160b99-8f5f-4d9e-8a54-99116fe9d202", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['d423ce8a-77aa-4c9a-94d4-d1adddcc3472',\n", + " 'fba26fde-4c50-4545-9734-ff415ac2d791',\n", + " 'e48ae739-dff8-4e56-b9b9-efff9de55a48',\n", + " '3d0fed38-45fd-4d44-8511-d157adacb238',\n", + " 'c404dbe5-116f-42c0-b199-503516dbbe91'], dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['sessionId'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "173d968d-fd66-4e16-97d3-da18fbbdc0f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sessionId\n", + "d423ce8a-77aa-4c9a-94d4-d1adddcc3472 50\n", + "c404dbe5-116f-42c0-b199-503516dbbe91 18\n", + "e48ae739-dff8-4e56-b9b9-efff9de55a48 8\n", + "fba26fde-4c50-4545-9734-ff415ac2d791 2\n", + "3d0fed38-45fd-4d44-8511-d157adacb238 2\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['sessionId'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "924bf0a9-6143-42f5-b779-780caf902ae8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementTextmetadata_dateIndexmetadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndex
30d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T21:05:43.942ZNaNJunior Suite1.01252.0NaNNaNNaNNaNNaNNaN1
31d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/d018efc1-25e9-4284-b276-80386e...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T21:05:45.407Zhttp://localhost:3000/hotel/products?dateIndex...NaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
32d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/d018efc1-25e9-4284-b276-80386e...d018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T21:05:45.515ZNaNNaN1.0NaNhotelJunior SuiteNaNNaNNaNNaN1
33d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T21:05:50.176ZNaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
34d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T21:05:54.666Zhttp://localhost:3000/hotel/products?dateIndex...NaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "30 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "31 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "32 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "33 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "34 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName \\\n", + "30 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_title \n", + "31 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "32 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "33 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "34 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "\n", + " page \\\n", + "30 /hotel/products \n", + "31 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "32 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "33 /hotel/products \n", + "34 /hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6... \n", + "\n", + " productId storeMode \\\n", + "30 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "31 None hotel \n", + "32 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "33 None hotel \n", + "34 None hotel \n", + "\n", + " userAgent \\\n", + "30 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "31 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "32 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "33 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "34 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts \\\n", + "30 2025-11-25T21:05:43.942Z \n", + "31 2025-11-25T21:05:45.407Z \n", + "32 2025-11-25T21:05:45.515Z \n", + "33 2025-11-25T21:05:50.176Z \n", + "34 2025-11-25T21:05:54.666Z \n", + "\n", + " metadata_referrer metadata_elementText \\\n", + "30 NaN Junior Suite \n", + "31 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "32 NaN NaN \n", + "33 NaN \n", + "34 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "\n", + " metadata_dateIndex metadata_dwellTime metadata_type metadata_roomType \\\n", + "30 1.0 1252.0 NaN NaN \n", + "31 NaN NaN NaN NaN \n", + "32 1.0 NaN hotel Junior Suite \n", + "33 NaN NaN NaN NaN \n", + "34 NaN NaN NaN NaN \n", + "\n", + " metadata_price metadata_nights metadata_total metadata_itemCount \\\n", + "30 NaN NaN NaN NaN \n", + "31 NaN NaN NaN NaN \n", + "32 NaN NaN NaN NaN \n", + "33 NaN NaN NaN NaN \n", + "34 NaN NaN NaN NaN \n", + "\n", + " dateIndex \n", + "30 1 \n", + "31 \n", + "32 1 \n", + "33 \n", + "34 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "session=\"d423ce8a-77aa-4c9a-94d4-d1adddcc3472\"\n", + "df=df[df['sessionId'] == session]\n", + "df=df.dropna(subset=[\"experimentId\"])\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3db0b073-e141-4ea4-bbf7-7123aeb9057f", + "metadata": {}, + "outputs": [], + "source": [ + "from procesing.steps import ExtractSessionFeaturesStep\n", + "feats = ExtractSessionFeaturesStep(context).transform(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c6527d2c-b68a-42e7-8558-cc957a371260", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementText...searchescart_addshoversunique_products_viewedproduct_view_depthsession_duration_secinteraction_velocityavg_time_between_eventsstd_time_between_eventscart_to_view_ratio
0d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:43.942000+00:00NaNJunior Suite...0.00.01.01.01.00.0000.0000000.0000000.0000000.0
1d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/d018efc1-25e9-4284-b276-80386e...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:45.407000+00:00http://localhost:3000/hotel/products?dateIndex...NaN...0.00.01.01.01.01.46581.9112631.465000NaN0.0
2d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/d018efc1-25e9-4284-b276-80386e...d018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:45.515000+00:00NaNNaN...0.00.01.01.02.01.573114.4310240.7865000.9595440.0
3d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:50.176000+00:00NaN...0.00.01.01.02.06.23438.4985562.0780002.3375800.0
4d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:54.666000+00:00http://localhost:3000/hotel/products?dateIndex...NaN...0.00.01.01.02.010.72427.9746362.6810002.2577180.0
5d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6...2ddabbfc-4127-48fc-86dc-ebc4c677efa2hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:54.794000+00:00NaNNaN...0.00.01.02.02.010.85233.1736092.1704002.2641840.0
6d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:05:57.670000+00:00NaN...0.00.01.02.02.013.72830.5944062.2880002.0455320.0
7d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/2cd7f756-fc65-4ba0-ab01-74521c...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:03.130000+00:00http://localhost:3000/hotel/products?dateIndex...NaN...0.00.01.02.02.019.18825.0156352.7411432.2190550.0
8d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/2cd7f756-fc65-4ba0-ab01-74521c...2cd7f756-fc65-4ba0-ab01-74521c1fff43hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:03.253000+00:00NaNNaN...0.00.01.03.02.019.31127.9633372.4138752.2533490.0
9d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35add_item_to_cart/hotel/products/2cd7f756-fc65-4ba0-ab01-74521c...2cd7f756-fc65-4ba0-ab01-74521c1fff43hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:06.815000+00:00NaNNaN...0.01.01.03.02.022.87326.2318022.5414442.1422760.0
10d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:08.600000+00:00NaN...0.01.01.03.02.024.65826.7661612.4658002.0338730.0
11d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35checkout_start/cartNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:11.586000+00:00NaNNaN...0.01.01.03.02.027.64426.0454352.5130911.9358660.0
12d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_title/hotel/products7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:28.050000+00:00NaNExecutive Suite...0.01.02.04.02.044.10817.6838673.6756674.4301100.0
13d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_paragraph/hotel/products7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:31.806000+00:00NaNprice...0.01.03.04.02.047.86417.5497243.6818464.2415660.0
14d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:33.847000+00:00http://localhost:3000/hotel/products?dateIndex...NaN...0.01.03.04.02.049.90518.0342653.5646434.0986930.0
15d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9...7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:34.029000+00:00NaNNaN...0.01.03.04.03.050.08719.1666503.3391334.0450160.0
16d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:37.255000+00:00NaN...0.01.03.04.03.053.31319.1322943.3320633.9079590.0
17d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_title/hotel/products7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:43.694000+00:00NaNExecutive Suite...0.01.04.04.04.059.75218.0747093.5148243.8581680.0
18d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:44.387000+00:00http://localhost:3000/hotel/products?dateIndex...NaN...0.01.04.04.04.060.44518.8601213.3580563.8016070.0
19d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35learn_more_about_item/hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9...7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:44.492000+00:00NaNNaN...0.01.04.04.05.060.55019.8183323.1868423.7691220.0
20d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35add_item_to_cart/hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9...7f71fbe2-343c-4a46-94ea-07cbd903a86chotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:46.279000+00:00NaNNaN...0.02.04.04.06.062.33720.2127153.1168503.6819240.0
21d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35checkout_start/cartNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25 21:06:48.275000+00:00NaNNaN...0.02.04.04.06.064.33320.5182413.0634763.5970210.0
22d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:26:19.145000+00:00NaN...0.02.04.04.06.064.33321.45088811347.05468253208.0352460.0
23d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35hover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:27:27.527000+00:00NaNJunior Suite...0.02.05.04.06.0132.71510.85031810856.67760952037.8677520.0
24d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:30:23.429000+00:00NaN...0.02.05.04.06.0308.6174.86039310411.64529250940.7151760.0
25d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:34:12.526000+00:00NaN...0.02.05.04.06.0537.7142.90117110004.34336049909.7249870.0
26d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:34:42.426000+00:00NaN...0.02.05.04.06.0567.6142.8540529620.71092348940.4530260.0
27d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:39:39.397000+00:00NaN...0.02.05.04.06.0864.5851.9431299275.38722248023.5963550.0
28d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-28 18:40:49.203000+00:00NaN...0.02.05.04.06.0934.3911.8621758946.61646447157.9800320.0
29d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/admin/experimentsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-29 17:27:43.775000+00:00NaN...0.02.05.04.06.0934.3911.92638811466.20113848255.0709410.0
\n", + "

30 rows × 32 columns

\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "0 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "1 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "2 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "3 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "4 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "5 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "6 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "7 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "8 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "9 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "10 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "11 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "12 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "13 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "14 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "15 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "16 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "17 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "18 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "19 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "20 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "21 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "22 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "23 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "24 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "25 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "26 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "27 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "28 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "29 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName \\\n", + "0 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_title \n", + "1 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "2 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "3 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "4 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "5 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "6 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "7 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "8 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "9 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 add_item_to_cart \n", + "10 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "11 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 checkout_start \n", + "12 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_title \n", + "13 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_paragraph \n", + "14 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "15 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "16 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "17 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_title \n", + "18 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "19 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 learn_more_about_item \n", + "20 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 add_item_to_cart \n", + "21 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 checkout_start \n", + "22 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "23 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 hover_over_title \n", + "24 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "25 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "26 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "27 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "28 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "29 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view \n", + "\n", + " page \\\n", + "0 /hotel/products \n", + "1 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "2 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "3 /hotel/products \n", + "4 /hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6... \n", + "5 /hotel/products/2ddabbfc-4127-48fc-86dc-ebc4c6... \n", + "6 /hotel/products \n", + "7 /hotel/products/2cd7f756-fc65-4ba0-ab01-74521c... \n", + "8 /hotel/products/2cd7f756-fc65-4ba0-ab01-74521c... \n", + "9 /hotel/products/2cd7f756-fc65-4ba0-ab01-74521c... \n", + "10 /hotel/products \n", + "11 /cart \n", + "12 /hotel/products \n", + "13 /hotel/products \n", + "14 /hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9... \n", + "15 /hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9... \n", + "16 /hotel/products \n", + "17 /hotel/products \n", + "18 /hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9... \n", + "19 /hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9... \n", + "20 /hotel/products/7f71fbe2-343c-4a46-94ea-07cbd9... \n", + "21 /cart \n", + "22 / \n", + "23 /hotel/products \n", + "24 /hotel/products \n", + "25 /hotel/products \n", + "26 /hotel/products \n", + "27 /hotel/products \n", + "28 /hotel/products \n", + "29 /admin/experiments \n", + "\n", + " productId storeMode \\\n", + "0 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "1 None hotel \n", + "2 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "3 None hotel \n", + "4 None hotel \n", + "5 2ddabbfc-4127-48fc-86dc-ebc4c677efa2 hotel \n", + "6 None hotel \n", + "7 None hotel \n", + "8 2cd7f756-fc65-4ba0-ab01-74521c1fff43 hotel \n", + "9 2cd7f756-fc65-4ba0-ab01-74521c1fff43 hotel \n", + "10 None hotel \n", + "11 None hotel \n", + "12 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "13 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "14 None hotel \n", + "15 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "16 None hotel \n", + "17 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "18 None hotel \n", + "19 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "20 7f71fbe2-343c-4a46-94ea-07cbd903a86c hotel \n", + "21 None hotel \n", + "22 None hotel \n", + "23 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "24 None hotel \n", + "25 None hotel \n", + "26 None hotel \n", + "27 None hotel \n", + "28 None hotel \n", + "29 None hotel \n", + "\n", + " userAgent \\\n", + "0 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "1 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "4 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "5 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "6 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "7 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "8 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "9 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "10 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "11 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "12 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "13 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "14 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "15 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "16 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "17 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "18 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "19 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "20 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "21 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "22 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "23 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "24 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "25 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "26 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "27 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "28 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "29 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts \\\n", + "0 2025-11-25 21:05:43.942000+00:00 \n", + "1 2025-11-25 21:05:45.407000+00:00 \n", + "2 2025-11-25 21:05:45.515000+00:00 \n", + "3 2025-11-25 21:05:50.176000+00:00 \n", + "4 2025-11-25 21:05:54.666000+00:00 \n", + "5 2025-11-25 21:05:54.794000+00:00 \n", + "6 2025-11-25 21:05:57.670000+00:00 \n", + "7 2025-11-25 21:06:03.130000+00:00 \n", + "8 2025-11-25 21:06:03.253000+00:00 \n", + "9 2025-11-25 21:06:06.815000+00:00 \n", + "10 2025-11-25 21:06:08.600000+00:00 \n", + "11 2025-11-25 21:06:11.586000+00:00 \n", + "12 2025-11-25 21:06:28.050000+00:00 \n", + "13 2025-11-25 21:06:31.806000+00:00 \n", + "14 2025-11-25 21:06:33.847000+00:00 \n", + "15 2025-11-25 21:06:34.029000+00:00 \n", + "16 2025-11-25 21:06:37.255000+00:00 \n", + "17 2025-11-25 21:06:43.694000+00:00 \n", + "18 2025-11-25 21:06:44.387000+00:00 \n", + "19 2025-11-25 21:06:44.492000+00:00 \n", + "20 2025-11-25 21:06:46.279000+00:00 \n", + "21 2025-11-25 21:06:48.275000+00:00 \n", + "22 2025-11-28 18:26:19.145000+00:00 \n", + "23 2025-11-28 18:27:27.527000+00:00 \n", + "24 2025-11-28 18:30:23.429000+00:00 \n", + "25 2025-11-28 18:34:12.526000+00:00 \n", + "26 2025-11-28 18:34:42.426000+00:00 \n", + "27 2025-11-28 18:39:39.397000+00:00 \n", + "28 2025-11-28 18:40:49.203000+00:00 \n", + "29 2025-11-29 17:27:43.775000+00:00 \n", + "\n", + " metadata_referrer metadata_elementText \\\n", + "0 NaN Junior Suite \n", + "1 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "2 NaN NaN \n", + "3 NaN \n", + "4 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "5 NaN NaN \n", + "6 NaN \n", + "7 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "8 NaN NaN \n", + "9 NaN NaN \n", + "10 NaN \n", + "11 NaN NaN \n", + "12 NaN Executive Suite \n", + "13 NaN price \n", + "14 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "15 NaN NaN \n", + "16 NaN \n", + "17 NaN Executive Suite \n", + "18 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "19 NaN NaN \n", + "20 NaN NaN \n", + "21 NaN NaN \n", + "22 NaN \n", + "23 NaN Junior Suite \n", + "24 NaN \n", + "25 NaN \n", + "26 NaN \n", + "27 NaN \n", + "28 NaN \n", + "29 NaN \n", + "\n", + " ... searches cart_adds hovers unique_products_viewed \\\n", + "0 ... 0.0 0.0 1.0 1.0 \n", + "1 ... 0.0 0.0 1.0 1.0 \n", + "2 ... 0.0 0.0 1.0 1.0 \n", + "3 ... 0.0 0.0 1.0 1.0 \n", + "4 ... 0.0 0.0 1.0 1.0 \n", + "5 ... 0.0 0.0 1.0 2.0 \n", + "6 ... 0.0 0.0 1.0 2.0 \n", + "7 ... 0.0 0.0 1.0 2.0 \n", + "8 ... 0.0 0.0 1.0 3.0 \n", + "9 ... 0.0 1.0 1.0 3.0 \n", + "10 ... 0.0 1.0 1.0 3.0 \n", + "11 ... 0.0 1.0 1.0 3.0 \n", + "12 ... 0.0 1.0 2.0 4.0 \n", + "13 ... 0.0 1.0 3.0 4.0 \n", + "14 ... 0.0 1.0 3.0 4.0 \n", + "15 ... 0.0 1.0 3.0 4.0 \n", + "16 ... 0.0 1.0 3.0 4.0 \n", + "17 ... 0.0 1.0 4.0 4.0 \n", + "18 ... 0.0 1.0 4.0 4.0 \n", + "19 ... 0.0 1.0 4.0 4.0 \n", + "20 ... 0.0 2.0 4.0 4.0 \n", + "21 ... 0.0 2.0 4.0 4.0 \n", + "22 ... 0.0 2.0 4.0 4.0 \n", + "23 ... 0.0 2.0 5.0 4.0 \n", + "24 ... 0.0 2.0 5.0 4.0 \n", + "25 ... 0.0 2.0 5.0 4.0 \n", + "26 ... 0.0 2.0 5.0 4.0 \n", + "27 ... 0.0 2.0 5.0 4.0 \n", + "28 ... 0.0 2.0 5.0 4.0 \n", + "29 ... 0.0 2.0 5.0 4.0 \n", + "\n", + " product_view_depth session_duration_sec interaction_velocity \\\n", + "0 1.0 0.000 0.000000 \n", + "1 1.0 1.465 81.911263 \n", + "2 2.0 1.573 114.431024 \n", + "3 2.0 6.234 38.498556 \n", + "4 2.0 10.724 27.974636 \n", + "5 2.0 10.852 33.173609 \n", + "6 2.0 13.728 30.594406 \n", + "7 2.0 19.188 25.015635 \n", + "8 2.0 19.311 27.963337 \n", + "9 2.0 22.873 26.231802 \n", + "10 2.0 24.658 26.766161 \n", + "11 2.0 27.644 26.045435 \n", + "12 2.0 44.108 17.683867 \n", + "13 2.0 47.864 17.549724 \n", + "14 2.0 49.905 18.034265 \n", + "15 3.0 50.087 19.166650 \n", + "16 3.0 53.313 19.132294 \n", + "17 4.0 59.752 18.074709 \n", + "18 4.0 60.445 18.860121 \n", + "19 5.0 60.550 19.818332 \n", + "20 6.0 62.337 20.212715 \n", + "21 6.0 64.333 20.518241 \n", + "22 6.0 64.333 21.450888 \n", + "23 6.0 132.715 10.850318 \n", + "24 6.0 308.617 4.860393 \n", + "25 6.0 537.714 2.901171 \n", + "26 6.0 567.614 2.854052 \n", + "27 6.0 864.585 1.943129 \n", + "28 6.0 934.391 1.862175 \n", + "29 6.0 934.391 1.926388 \n", + "\n", + " avg_time_between_events std_time_between_events cart_to_view_ratio \n", + "0 0.000000 0.000000 0.0 \n", + "1 1.465000 NaN 0.0 \n", + "2 0.786500 0.959544 0.0 \n", + "3 2.078000 2.337580 0.0 \n", + "4 2.681000 2.257718 0.0 \n", + "5 2.170400 2.264184 0.0 \n", + "6 2.288000 2.045532 0.0 \n", + "7 2.741143 2.219055 0.0 \n", + "8 2.413875 2.253349 0.0 \n", + "9 2.541444 2.142276 0.0 \n", + "10 2.465800 2.033873 0.0 \n", + "11 2.513091 1.935866 0.0 \n", + "12 3.675667 4.430110 0.0 \n", + "13 3.681846 4.241566 0.0 \n", + "14 3.564643 4.098693 0.0 \n", + "15 3.339133 4.045016 0.0 \n", + "16 3.332063 3.907959 0.0 \n", + "17 3.514824 3.858168 0.0 \n", + "18 3.358056 3.801607 0.0 \n", + "19 3.186842 3.769122 0.0 \n", + "20 3.116850 3.681924 0.0 \n", + "21 3.063476 3.597021 0.0 \n", + "22 11347.054682 53208.035246 0.0 \n", + "23 10856.677609 52037.867752 0.0 \n", + "24 10411.645292 50940.715176 0.0 \n", + "25 10004.343360 49909.724987 0.0 \n", + "26 9620.710923 48940.453026 0.0 \n", + "27 9275.387222 48023.596355 0.0 \n", + "28 8946.616464 47157.980032 0.0 \n", + "29 11466.201138 48255.070941 0.0 \n", + "\n", + "[30 rows x 32 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdf92ad5-a15a-47d1-988e-f2d976f81416", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (PHANTOM)", + "language": "python", + "name": "phantom" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/experiments/notebooks/step_breakdown.ipynb b/experiments/notebooks/step_breakdown.ipynb new file mode 100644 index 0000000..a2dfa18 --- /dev/null +++ b/experiments/notebooks/step_breakdown.ipynb @@ -0,0 +1,2320 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "d6bc6a6d-2454-4222-a1ed-1b06bb7b95d1", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "project_root = \"/home/velocitatem/Documents/Projects/PHANTOM/experiments\"\n", + "if str(Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()) not in sys.path:\n", + " sys.path.insert(0, str(project_root))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6926d0a1-02f2-47c1-b927-6b5bd28ae8cc", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a48f6ab9-46bf-4553-8489-f5ecb58f8d7c", + "metadata": {}, + "outputs": [], + "source": [ + "from procesing.steps import (\n", + " FetchInteractionsStep,\n", + " FetchPriceLogsStep,\n", + " FetchExperimentsStep,\n", + " JoinExperimentsStep,\n", + " CreatePriceBucketsStep,\n", + " AugmentEventNamesStep,\n", + " ChunkByTimeWindowStep,\n", + " ComputeDemandForChunksStep,\n", + " AggregatePriceLogsStep,\n", + " ComputeElasticityStep,\n", + " FitPricingFunctionStep,\n", + " PredictPricesStep,\n", + ")\n", + "from procesing.context import PipelineContext\n", + "from procesing.providers import SupabaseProvider, BackendAPIProvider" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bb7ebdfb-432e-4cf6-b0ee-dc030107ebc5", + "metadata": {}, + "outputs": [], + "source": [ + "class Provider(SupabaseProvider, BackendAPIProvider):\n", + " def __init__(self, backend_url: str):\n", + " SupabaseProvider.__init__(self)\n", + " BackendAPIProvider.__init__(self, backend_url=backend_url)\n", + "# example run\n", + "context = PipelineContext(\n", + " provider=Provider(backend_url=\"http://localhost:5000\"),\n", + " store_mode='hotel',\n", + " window_size='15min',\n", + "\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "587b1fdc-30f4-4ee0-b603-7a54b8bed5eb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementTextmetadata_dateIndexmetadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndex
0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:20:13.061ZNaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
1d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:17.425ZNaNJunior Suite1.01200.0NaNNaNNaNNaNNaNNaN1
2d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_paragraph/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:19.496ZNaNprice1.01202.0NaNNaNNaNNaNNaNNaN1
3d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/hotel/products/d018efc1-25e9-4284-b276-80386e...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:21.922Zhttp://localhost:3000/hotel/products?dateIndex...NaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
4d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonelearn_more_about_item/hotel/products/d018efc1-25e9-4284-b276-80386e...d018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:22.674ZNaNNaN1.0NaNhotelJunior SuiteNaNNaNNaNNaN1
\n", + "
" + ], + "text/plain": [ + " sessionId experimentId eventName \\\n", + "0 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "1 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_title \n", + "2 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_paragraph \n", + "3 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "4 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None learn_more_about_item \n", + "\n", + " page \\\n", + "0 / \n", + "1 /hotel/products \n", + "2 /hotel/products \n", + "3 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "4 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "\n", + " productId storeMode \\\n", + "0 None hotel \n", + "1 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "2 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "3 None hotel \n", + "4 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "\n", + " userAgent \\\n", + "0 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "1 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "4 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts \\\n", + "0 2025-11-25T20:20:13.061Z \n", + "1 2025-11-25T20:21:17.425Z \n", + "2 2025-11-25T20:21:19.496Z \n", + "3 2025-11-25T20:21:21.922Z \n", + "4 2025-11-25T20:21:22.674Z \n", + "\n", + " metadata_referrer metadata_elementText \\\n", + "0 NaN \n", + "1 NaN Junior Suite \n", + "2 NaN price \n", + "3 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "4 NaN NaN \n", + "\n", + " metadata_dateIndex metadata_dwellTime metadata_type metadata_roomType \\\n", + "0 NaN NaN NaN NaN \n", + "1 1.0 1200.0 NaN NaN \n", + "2 1.0 1202.0 NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 1.0 NaN hotel Junior Suite \n", + "\n", + " metadata_price metadata_nights metadata_total metadata_itemCount \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " dateIndex \n", + "0 \n", + "1 1 \n", + "2 1 \n", + "3 \n", + "4 1 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df=FetchInteractionsStep(context).transform(None)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "45022876-30d9-4607-a10f-932df9b4dbda", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementTextmetadata_dateIndexmetadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndexprice_bucket
0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:20:13.061ZNaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
1d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_title/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:17.425ZNaNJunior Suite1.01200.0NaNNaNNaNNaNNaNNaN1
2d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehover_over_paragraph/hotel/productsd018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:19.496ZNaNprice1.01202.0NaNNaNNaNNaNNaNNaN1
3d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonepage_view/hotel/products/d018efc1-25e9-4284-b276-80386e...NonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:21.922Zhttp://localhost:3000/hotel/products?dateIndex...NaNNaNNaNNaNNaNNaNNaNNaNNaN<NA>
4d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonelearn_more_about_item/hotel/products/d018efc1-25e9-4284-b276-80386e...d018efc1-25e9-4284-b276-80386e048b25hotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-11-25T20:21:22.674ZNaNNaN1.0NaNhotelJunior SuiteNaNNaNNaNNaN1
\n", + "
" + ], + "text/plain": [ + " sessionId experimentId eventName \\\n", + "0 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "1 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_title \n", + "2 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hover_over_paragraph \n", + "3 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None page_view \n", + "4 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None learn_more_about_item \n", + "\n", + " page \\\n", + "0 / \n", + "1 /hotel/products \n", + "2 /hotel/products \n", + "3 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "4 /hotel/products/d018efc1-25e9-4284-b276-80386e... \n", + "\n", + " productId storeMode \\\n", + "0 None hotel \n", + "1 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "2 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "3 None hotel \n", + "4 d018efc1-25e9-4284-b276-80386e048b25 hotel \n", + "\n", + " userAgent \\\n", + "0 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "1 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "4 Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts \\\n", + "0 2025-11-25T20:20:13.061Z \n", + "1 2025-11-25T20:21:17.425Z \n", + "2 2025-11-25T20:21:19.496Z \n", + "3 2025-11-25T20:21:21.922Z \n", + "4 2025-11-25T20:21:22.674Z \n", + "\n", + " metadata_referrer metadata_elementText \\\n", + "0 NaN \n", + "1 NaN Junior Suite \n", + "2 NaN price \n", + "3 http://localhost:3000/hotel/products?dateIndex... NaN \n", + "4 NaN NaN \n", + "\n", + " metadata_dateIndex metadata_dwellTime metadata_type metadata_roomType \\\n", + "0 NaN NaN NaN NaN \n", + "1 1.0 1200.0 NaN NaN \n", + "2 1.0 1202.0 NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 1.0 NaN hotel Junior Suite \n", + "\n", + " metadata_price metadata_nights metadata_total metadata_itemCount \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " dateIndex price_bucket \n", + "0 \n", + "1 1 \n", + "2 1 \n", + "3 \n", + "4 1 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = CreatePriceBucketsStep(context).transform(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "720b9631-4350-425a-ad29-ded745ce28f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementText...metadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndexprice_bucketmetadata_schema
78c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T17:32:45.064ZNaN...NaNNaNNaNNaNNaNNaNNaN<NA>
79c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T18:13:53.858ZNaN...NaNNaNNaNNaNNaNNaNNaN<NA>
80d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:13:15.884ZNaN...NaNNaNNaNNaNNaNNaNNaN<NA>
81d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:18:53.473ZNaN...NaNNaNNaNNaNNaNNaNNaN<NA>
82d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:19:05.094ZNaN...NaNNaNNaNNaNNaNNaNNaN<NA>
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "78 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "79 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "80 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "81 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "82 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName page \\\n", + "78 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "79 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "80 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "81 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "82 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "\n", + " productId storeMode userAgent \\\n", + "78 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "79 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "80 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "81 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "82 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts metadata_referrer metadata_elementText ... \\\n", + "78 2025-11-29T17:32:45.064Z NaN ... \n", + "79 2025-11-29T18:13:53.858Z NaN ... \n", + "80 2025-12-04T11:13:15.884Z NaN ... \n", + "81 2025-12-04T11:18:53.473Z NaN ... \n", + "82 2025-12-04T11:19:05.094Z NaN ... \n", + "\n", + " metadata_dwellTime metadata_type metadata_roomType metadata_price \\\n", + "78 NaN NaN NaN NaN \n", + "79 NaN NaN NaN NaN \n", + "80 NaN NaN NaN NaN \n", + "81 NaN NaN NaN NaN \n", + "82 NaN NaN NaN NaN \n", + "\n", + " metadata_nights metadata_total metadata_itemCount dateIndex \\\n", + "78 NaN NaN NaN \n", + "79 NaN NaN NaN \n", + "80 NaN NaN NaN \n", + "81 NaN NaN NaN \n", + "82 NaN NaN NaN \n", + "\n", + " price_bucket metadata_schema \n", + "78 \n", + "79 \n", + "80 \n", + "81 \n", + "82 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = AugmentEventNamesStep(context).transform(df)\n", + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9c6add1a-147a-4086-a437-3f47f17d69bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productIdpricesessionIdexperimentIdstoreModets
2132cd7f756-fc65-4ba0-ab01-74521c1fff43100.0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehotel2025-12-04T11:18:56.320Z
2142ddabbfc-4127-48fc-86dc-ebc4c677efa2100.0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehotel2025-12-04T11:19:05.434Z
2152cd7f756-fc65-4ba0-ab01-74521c1fff43100.0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehotel2025-12-04T11:19:05.338Z
2162cd7f756-fc65-4ba0-ab01-74521c1fff43100.0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehotel2025-12-04T11:19:05.597Z
2172ddabbfc-4127-48fc-86dc-ebc4c677efa2100.0d423ce8a-77aa-4c9a-94d4-d1adddcc3472Nonehotel2025-12-04T11:19:05.594Z
\n", + "
" + ], + "text/plain": [ + " productId price \\\n", + "213 2cd7f756-fc65-4ba0-ab01-74521c1fff43 100.0 \n", + "214 2ddabbfc-4127-48fc-86dc-ebc4c677efa2 100.0 \n", + "215 2cd7f756-fc65-4ba0-ab01-74521c1fff43 100.0 \n", + "216 2cd7f756-fc65-4ba0-ab01-74521c1fff43 100.0 \n", + "217 2ddabbfc-4127-48fc-86dc-ebc4c677efa2 100.0 \n", + "\n", + " sessionId experimentId storeMode \\\n", + "213 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hotel \n", + "214 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hotel \n", + "215 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hotel \n", + "216 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hotel \n", + "217 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 None hotel \n", + "\n", + " ts \n", + "213 2025-12-04T11:18:56.320Z \n", + "214 2025-12-04T11:19:05.434Z \n", + "215 2025-12-04T11:19:05.338Z \n", + "216 2025-12-04T11:19:05.597Z \n", + "217 2025-12-04T11:19:05.594Z " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "price_df=FetchPriceLogsStep(context).fit_transform(None)\n", + "price_df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "28c3d101-8e82-46c3-aa06-765bd2799177", + "metadata": {}, + "outputs": [], + "source": [ + "df_chunks = ChunkByTimeWindowStep(context).transform(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9f1e4005-a1ed-423e-ad55-cc00a18ac10b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df_chunks)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f427318c-c4d3-448a-9b02-f5f71ef15f5c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2025-12-04 11:15:00+0000', tz='UTC')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_chunks[-1]['window_start']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fc87a9cc-30f0-460d-a13d-b61f7bf83fab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2025-12-04 11:30:00+0000', tz='UTC')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_chunks[-1]['window_end']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2d850541-47f5-4cd5-8777-ca526645a39b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementText...metadata_dwellTimemetadata_typemetadata_roomTypemetadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndexprice_bucketmetadata_schema
81d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04 11:18:53.473000+00:00NaN...NaNNaNNaNNaNNaNNaNNaN<NA>
82d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04 11:19:05.094000+00:00NaN...NaNNaNNaNNaNNaNNaNNaN<NA>
\n", + "

2 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "81 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "82 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName page \\\n", + "81 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "82 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "\n", + " productId storeMode userAgent \\\n", + "81 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "82 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts metadata_referrer metadata_elementText \\\n", + "81 2025-12-04 11:18:53.473000+00:00 NaN \n", + "82 2025-12-04 11:19:05.094000+00:00 NaN \n", + "\n", + " ... metadata_dwellTime metadata_type metadata_roomType metadata_price \\\n", + "81 ... NaN NaN NaN NaN \n", + "82 ... NaN NaN NaN NaN \n", + "\n", + " metadata_nights metadata_total metadata_itemCount dateIndex \\\n", + "81 NaN NaN NaN \n", + "82 NaN NaN NaN \n", + "\n", + " price_bucket metadata_schema \n", + "81 \n", + "82 \n", + "\n", + "[2 rows x 21 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_chunks[-1]['data'].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "eda09da8-324b-4af9-971c-d366d8b870d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "demand = ComputeDemandForChunksStep(context).transform(df_chunks)\n", + "len(demand)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bd2d40ce-6f51-42a8-8849-cc1a4479f4d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productIddemand_score
0bec37f41-7756-47ae-9219-f5854290f4e70
15e666c06-023a-415b-9976-be0956bbc4050
2d018efc1-25e9-4284-b276-80386e048b250
32cd7f756-fc65-4ba0-ab01-74521c1fff430
451266ddb-5b07-47b7-89ee-5b5cae94bb110
.........
790d1c9a3a-bc37-4417-a59f-de4b994944cb0
80fc64bd74-4dfa-4f78-802a-39d6aa4c39fe0
81d85d4c52-baa0-435f-81ac-b0c27a5251b30
8293bc00e5-8cfe-42af-8322-49bc274076880
8318cc01db-55cc-42a5-aab5-e3ec448548d80
\n", + "

84 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " productId demand_score\n", + "0 bec37f41-7756-47ae-9219-f5854290f4e7 0\n", + "1 5e666c06-023a-415b-9976-be0956bbc405 0\n", + "2 d018efc1-25e9-4284-b276-80386e048b25 0\n", + "3 2cd7f756-fc65-4ba0-ab01-74521c1fff43 0\n", + "4 51266ddb-5b07-47b7-89ee-5b5cae94bb11 0\n", + ".. ... ...\n", + "79 0d1c9a3a-bc37-4417-a59f-de4b994944cb 0\n", + "80 fc64bd74-4dfa-4f78-802a-39d6aa4c39fe 0\n", + "81 d85d4c52-baa0-435f-81ac-b0c27a5251b3 0\n", + "82 93bc00e5-8cfe-42af-8322-49bc27407688 0\n", + "83 18cc01db-55cc-42a5-aab5-e3ec448548d8 0\n", + "\n", + "[84 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "demand[-1]['demand_vector']" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "20293a35-c0a8-416e-bfb9-178883f4ca5f", + "metadata": {}, + "outputs": [], + "source": [ + "price_df_agg = AggregatePriceLogsStep(context).transform(price_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bf378587-f5ef-431b-a1c2-663124b5e42b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productIdprice
02cd7f756-fc65-4ba0-ab01-74521c1fff43100.00
12ddabbfc-4127-48fc-86dc-ebc4c677efa2100.00
251266ddb-5b07-47b7-89ee-5b5cae94bb11100.00
3d018efc1-25e9-4284-b276-80386e048b25100.00
4aaae8177-0803-4421-8702-f3ffeeeadcd9389.04
57f71fbe2-343c-4a46-94ea-07cbd903a86c327.94
6d6affcb8-6616-47f8-af14-2ec8583f0781391.43
70fbcf915-ecf1-4ec3-9b00-8bbc314e2a81900.97
8eceedfb3-ec52-4453-9aab-88dd9a6b6ca3640.54
\n", + "
" + ], + "text/plain": [ + " productId price\n", + "0 2cd7f756-fc65-4ba0-ab01-74521c1fff43 100.00\n", + "1 2ddabbfc-4127-48fc-86dc-ebc4c677efa2 100.00\n", + "2 51266ddb-5b07-47b7-89ee-5b5cae94bb11 100.00\n", + "3 d018efc1-25e9-4284-b276-80386e048b25 100.00\n", + "4 aaae8177-0803-4421-8702-f3ffeeeadcd9 389.04\n", + "5 7f71fbe2-343c-4a46-94ea-07cbd903a86c 327.94\n", + "6 d6affcb8-6616-47f8-af14-2ec8583f0781 391.43\n", + "7 0fbcf915-ecf1-4ec3-9b00-8bbc314e2a81 900.97\n", + "8 eceedfb3-ec52-4453-9aab-88dd9a6b6ca3 640.54" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "price_df_agg[-1]['price_vector']" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d05f003a-9537-49f6-a814-118e80cd8748", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productIdelasticitystd_errorn_obs
0d018efc1-25e9-4284-b276-80386e048b25-0.2220540.44710211
12cd7f756-fc65-4ba0-ab01-74521c1fff43-0.0728570.51013011
251266ddb-5b07-47b7-89ee-5b5cae94bb11-0.2910830.34687911
32ddabbfc-4127-48fc-86dc-ebc4c677efa2-10.2239680.00000011
47f71fbe2-343c-4a46-94ea-07cbd903a86c0.0000000.0000009
...............
790d1c9a3a-bc37-4417-a59f-de4b994944cb0.0000000.0000000
80fc64bd74-4dfa-4f78-802a-39d6aa4c39fe0.0000000.0000000
81d85d4c52-baa0-435f-81ac-b0c27a5251b30.0000000.0000000
8293bc00e5-8cfe-42af-8322-49bc274076880.0000000.0000000
8318cc01db-55cc-42a5-aab5-e3ec448548d80.0000000.0000000
\n", + "

84 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " productId elasticity std_error n_obs\n", + "0 d018efc1-25e9-4284-b276-80386e048b25 -0.222054 0.447102 11\n", + "1 2cd7f756-fc65-4ba0-ab01-74521c1fff43 -0.072857 0.510130 11\n", + "2 51266ddb-5b07-47b7-89ee-5b5cae94bb11 -0.291083 0.346879 11\n", + "3 2ddabbfc-4127-48fc-86dc-ebc4c677efa2 -10.223968 0.000000 11\n", + "4 7f71fbe2-343c-4a46-94ea-07cbd903a86c 0.000000 0.000000 9\n", + ".. ... ... ... ...\n", + "79 0d1c9a3a-bc37-4417-a59f-de4b994944cb 0.000000 0.000000 0\n", + "80 fc64bd74-4dfa-4f78-802a-39d6aa4c39fe 0.000000 0.000000 0\n", + "81 d85d4c52-baa0-435f-81ac-b0c27a5251b3 0.000000 0.000000 0\n", + "82 93bc00e5-8cfe-42af-8322-49bc27407688 0.000000 0.000000 0\n", + "83 18cc01db-55cc-42a5-aab5-e3ec448548d8 0.000000 0.000000 0\n", + "\n", + "[84 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elasticity = ComputeElasticityStep(context).transform((demand, price_df_agg))\n", + "elasticity" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "926cd9ea-8f6b-43b5-95a5-8fdf5309e1bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
elasticitystd_errorn_obs
productId
d018efc1-25e9-4284-b276-80386e048b25-0.2220540.44710211
2cd7f756-fc65-4ba0-ab01-74521c1fff43-0.0728570.51013011
51266ddb-5b07-47b7-89ee-5b5cae94bb11-0.2910830.34687911
2ddabbfc-4127-48fc-86dc-ebc4c677efa2-10.2239680.00000011
7f71fbe2-343c-4a46-94ea-07cbd903a86c0.0000000.0000009
............
0d1c9a3a-bc37-4417-a59f-de4b994944cb0.0000000.0000000
fc64bd74-4dfa-4f78-802a-39d6aa4c39fe0.0000000.0000000
d85d4c52-baa0-435f-81ac-b0c27a5251b30.0000000.0000000
93bc00e5-8cfe-42af-8322-49bc274076880.0000000.0000000
18cc01db-55cc-42a5-aab5-e3ec448548d80.0000000.0000000
\n", + "

84 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " elasticity std_error n_obs\n", + "productId \n", + "d018efc1-25e9-4284-b276-80386e048b25 -0.222054 0.447102 11\n", + "2cd7f756-fc65-4ba0-ab01-74521c1fff43 -0.072857 0.510130 11\n", + "51266ddb-5b07-47b7-89ee-5b5cae94bb11 -0.291083 0.346879 11\n", + "2ddabbfc-4127-48fc-86dc-ebc4c677efa2 -10.223968 0.000000 11\n", + "7f71fbe2-343c-4a46-94ea-07cbd903a86c 0.000000 0.000000 9\n", + "... ... ... ...\n", + "0d1c9a3a-bc37-4417-a59f-de4b994944cb 0.000000 0.000000 0\n", + "fc64bd74-4dfa-4f78-802a-39d6aa4c39fe 0.000000 0.000000 0\n", + "d85d4c52-baa0-435f-81ac-b0c27a5251b3 0.000000 0.000000 0\n", + "93bc00e5-8cfe-42af-8322-49bc27407688 0.000000 0.000000 0\n", + "18cc01db-55cc-42a5-aab5-e3ec448548d8 0.000000 0.000000 0\n", + "\n", + "[84 rows x 3 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elasticity.set_index('productId')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "bfa8a023-24da-4b3a-bafb-b174e54bf3b0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 84 entries, 0 to 83\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 productId 84 non-null object \n", + " 1 elasticity 84 non-null float64\n", + " 2 std_error 84 non-null float64\n", + " 3 n_obs 84 non-null int64 \n", + "dtypes: float64(2), int64(1), object(1)\n", + "memory usage: 2.8+ KB\n" + ] + } + ], + "source": [ + "elasticity.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5a8823da-da7a-41e4-8166-399538c80a73", + "metadata": {}, + "outputs": [], + "source": [ + "df['productId'] = df['productId'].astype(str)\n", + "elasticity['productId'] = elasticity['productId'].astype(str)\n", + "dff=df.join(elasticity.set_index('productId'), how=\"left\", on=\"productId\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e5d1639b-768b-4217-8821-ee09bb3e60c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementText...metadata_pricemetadata_nightsmetadata_totalmetadata_itemCountdateIndexprice_bucketmetadata_schemaelasticitystd_errorn_obs
78c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T17:32:45.064ZNaN...NaNNaNNaNNaN<NA>NaNNaNNaN
79c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T18:13:53.858ZNaN...NaNNaNNaNNaN<NA>NaNNaNNaN
80d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:13:15.884ZNaN...NaNNaNNaNNaN<NA>NaNNaNNaN
81d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:18:53.473ZNaN...NaNNaNNaNNaN<NA>NaNNaNNaN
82d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:19:05.094ZNaN...NaNNaNNaNNaN<NA>NaNNaNNaN
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "78 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "79 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "80 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "81 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "82 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName page \\\n", + "78 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "79 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "80 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "81 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "82 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "\n", + " productId storeMode userAgent \\\n", + "78 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "79 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "80 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "81 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "82 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts metadata_referrer metadata_elementText ... \\\n", + "78 2025-11-29T17:32:45.064Z NaN ... \n", + "79 2025-11-29T18:13:53.858Z NaN ... \n", + "80 2025-12-04T11:13:15.884Z NaN ... \n", + "81 2025-12-04T11:18:53.473Z NaN ... \n", + "82 2025-12-04T11:19:05.094Z NaN ... \n", + "\n", + " metadata_price metadata_nights metadata_total metadata_itemCount \\\n", + "78 NaN NaN NaN NaN \n", + "79 NaN NaN NaN NaN \n", + "80 NaN NaN NaN NaN \n", + "81 NaN NaN NaN NaN \n", + "82 NaN NaN NaN NaN \n", + "\n", + " dateIndex price_bucket metadata_schema elasticity std_error n_obs \n", + "78 NaN NaN NaN \n", + "79 NaN NaN NaN \n", + "80 NaN NaN NaN \n", + "81 NaN NaN NaN \n", + "82 NaN NaN NaN \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "38d4c603-35ed-4de4-847a-48e74044d6d3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsubject_namexp_human_onlyxp_market_modexp_task_idtask
053aefd07-f66a-4d7f-ba8b-7ea1fc562d35DanielFalsehotel517b8078-cf4c-4a1f-b943-75281c69a5b3{'task_name': 'Cheapest Room', 'task_def_of_do...
1d10f5ab3-a7b7-4e97-8d94-ab06f1537c0aFull AgentFalsehotel517b8078-cf4c-4a1f-b943-75281c69a5b3{'task_name': 'Cheapest Room', 'task_def_of_do...
2fd01774c-f629-4bcb-88b8-c818856af72aDaniel 1Truehotel920c3deb-18c6-4586-bbc4-4ce4d1ae6f2d{'task_name': 'Cheapest Room w/ View', 'task_d...
\n", + "
" + ], + "text/plain": [ + " id subject_name xp_human_only \\\n", + "0 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 Daniel False \n", + "1 d10f5ab3-a7b7-4e97-8d94-ab06f1537c0a Full Agent False \n", + "2 fd01774c-f629-4bcb-88b8-c818856af72a Daniel 1 True \n", + "\n", + " xp_market_mode xp_task_id \\\n", + "0 hotel 517b8078-cf4c-4a1f-b943-75281c69a5b3 \n", + "1 hotel 517b8078-cf4c-4a1f-b943-75281c69a5b3 \n", + "2 hotel 920c3deb-18c6-4586-bbc4-4ce4d1ae6f2d \n", + "\n", + " task \n", + "0 {'task_name': 'Cheapest Room', 'task_def_of_do... \n", + "1 {'task_name': 'Cheapest Room', 'task_def_of_do... \n", + "2 {'task_name': 'Cheapest Room w/ View', 'task_d... " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "experiments = FetchExperimentsStep(context).transform(dff)\n", + "experiments.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fd9f831-8a9e-40c2-89a5-de81fb4f77f3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "92f354ee-6550-48c6-87fd-46b0217e57ed", + "metadata": {}, + "outputs": [], + "source": [ + "dff_exp = JoinExperimentsStep(context).transform((dff,experiments))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "d2823849-6ff6-46d2-abc8-9e363b0f66dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sessionIdexperimentIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_elementText...elasticitystd_errorn_obsexp_subjectexp_human_onlyexp_market_modeexp_task_idtask_nametask_def_of_donetask_description
76c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T17:32:45.064ZNaN...NaNNaNNaNDaniel 1Truehotel920c3deb-18c6-4586-bbc4-4ce4d1ae6f2dCheapest Room w/ ViewUser added to cart a the cheapest room of all ...Find the cheapest room with a nice view in the...
77c404dbe5-116f-42c0-b199-503516dbbe91fd01774c-f629-4bcb-88b8-c818856af72apage_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-29T18:13:53.858ZNaN...NaNNaNNaNDaniel 1Truehotel920c3deb-18c6-4586-bbc4-4ce4d1ae6f2dCheapest Room w/ ViewUser added to cart a the cheapest room of all ...Find the cheapest room with a nice view in the...
78d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:13:15.884ZNaN...NaNNaNNaNDanielFalsehotel517b8078-cf4c-4a1f-b943-75281c69a5b3Cheapest RoomA room was added and purchased.Find the cheapest hotel room in multiple steps...
79d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:18:53.473ZNaN...NaNNaNNaNDanielFalsehotel517b8078-cf4c-4a1f-b943-75281c69a5b3Cheapest RoomA room was added and purchased.Find the cheapest hotel room in multiple steps...
80d423ce8a-77aa-4c9a-94d4-d1adddcc347253aefd07-f66a-4d7f-ba8b-7ea1fc562d35page_view/hotel/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck...2025-12-04T11:19:05.094ZNaN...NaNNaNNaNDanielFalsehotel517b8078-cf4c-4a1f-b943-75281c69a5b3Cheapest RoomA room was added and purchased.Find the cheapest hotel room in multiple steps...
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " sessionId \\\n", + "76 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "77 c404dbe5-116f-42c0-b199-503516dbbe91 \n", + "78 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "79 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "80 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 \n", + "\n", + " experimentId eventName page \\\n", + "76 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "77 fd01774c-f629-4bcb-88b8-c818856af72a page_view /hotel/products \n", + "78 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "79 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "80 53aefd07-f66a-4d7f-ba8b-7ea1fc562d35 page_view /hotel/products \n", + "\n", + " productId storeMode userAgent \\\n", + "76 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "77 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "78 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "79 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "80 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Geck... \n", + "\n", + " ts metadata_referrer metadata_elementText ... \\\n", + "76 2025-11-29T17:32:45.064Z NaN ... \n", + "77 2025-11-29T18:13:53.858Z NaN ... \n", + "78 2025-12-04T11:13:15.884Z NaN ... \n", + "79 2025-12-04T11:18:53.473Z NaN ... \n", + "80 2025-12-04T11:19:05.094Z NaN ... \n", + "\n", + " elasticity std_error n_obs exp_subject exp_human_only exp_market_mode \\\n", + "76 NaN NaN NaN Daniel 1 True hotel \n", + "77 NaN NaN NaN Daniel 1 True hotel \n", + "78 NaN NaN NaN Daniel False hotel \n", + "79 NaN NaN NaN Daniel False hotel \n", + "80 NaN NaN NaN Daniel False hotel \n", + "\n", + " exp_task_id task_name \\\n", + "76 920c3deb-18c6-4586-bbc4-4ce4d1ae6f2d Cheapest Room w/ View \n", + "77 920c3deb-18c6-4586-bbc4-4ce4d1ae6f2d Cheapest Room w/ View \n", + "78 517b8078-cf4c-4a1f-b943-75281c69a5b3 Cheapest Room \n", + "79 517b8078-cf4c-4a1f-b943-75281c69a5b3 Cheapest Room \n", + "80 517b8078-cf4c-4a1f-b943-75281c69a5b3 Cheapest Room \n", + "\n", + " task_def_of_done \\\n", + "76 User added to cart a the cheapest room of all ... \n", + "77 User added to cart a the cheapest room of all ... \n", + "78 A room was added and purchased. \n", + "79 A room was added and purchased. \n", + "80 A room was added and purchased. \n", + "\n", + " task_description \n", + "76 Find the cheapest room with a nice view in the... \n", + "77 Find the cheapest room with a nice view in the... \n", + "78 Find the cheapest hotel room in multiple steps... \n", + "79 Find the cheapest hotel room in multiple steps... \n", + "80 Find the cheapest hotel room in multiple steps... \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff_exp.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb61bb99-3597-4473-86e2-c90cc51a9c9a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (PHANTOM)", + "language": "python", + "name": "phantom" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/experiments/procesing/tests/test_session.py b/experiments/procesing/tests/test_session.py new file mode 100644 index 0000000..bb45d87 --- /dev/null +++ b/experiments/procesing/tests/test_session.py @@ -0,0 +1,165 @@ +import pytest +import pandas as pd +import numpy as np +from procesing.steps.session import ( + TemporalFeatureStep, + BehavioralFeatureStep, + ProductFeatureStep, + UserAgentFeatureStep, + ExtractSessionFeaturesStep, + JoinLabelsStep, + ValidateDataStep, +) + + +# TemporalFeatureStep tests +def test_temporal_empty(pipeline_context): + result = TemporalFeatureStep(pipeline_context).transform(pd.DataFrame()) + assert 'sessionId' in result.columns + assert result.empty + + +def test_temporal_basic(pipeline_context, session_interactions): + result = TemporalFeatureStep(pipeline_context).transform(session_interactions) + assert 'session_duration_sec' in result.columns + assert 'interaction_velocity' in result.columns + assert 'max_velocity_5min' in result.columns + assert result['total_interactions'].sum() == len(session_interactions) + + +def test_temporal_timeout(pipeline_context): + df = pd.DataFrame({ + 'sessionId': ['s1', 's1'], + 'ts': ['2025-01-01T10:00:00Z', '2025-01-01T11:00:00Z'], # 1 hour gap + }) + result = TemporalFeatureStep(pipeline_context, timeout_sec=900).transform(df) + assert result.iloc[0]['session_duration_sec'] == 0 # gap exceeds timeout + + +# BehavioralFeatureStep tests +def test_behavioral_empty(pipeline_context): + result = BehavioralFeatureStep(pipeline_context).transform(pd.DataFrame()) + assert 'sessionId' in result.columns + + +def test_behavioral_counts(pipeline_context, session_interactions): + result = BehavioralFeatureStep(pipeline_context).transform(session_interactions) + assert 'page_views' in result.columns + assert 'item_views' in result.columns + assert 'hover_events' in result.columns + assert result['total_events'].sum() == len(session_interactions) + + +def test_behavioral_hover_prefix(pipeline_context): + df = pd.DataFrame({ + 'sessionId': ['s1', 's1'], + 'eventName': ['hover_over_custom', 'hover_over_button'], + 'page': ['/products', '/products'], + }) + result = BehavioralFeatureStep(pipeline_context).transform(df) + assert result.iloc[0]['hover_events'] == 2 + + +# ProductFeatureStep tests +def test_product_empty(pipeline_context): + result = ProductFeatureStep(pipeline_context).transform(pd.DataFrame()) + assert 'sessionId' in result.columns + + +def test_product_features(pipeline_context, session_interactions): + result = ProductFeatureStep(pipeline_context).transform(session_interactions) + assert 'unique_products_viewed' in result.columns + assert 'price_range' in result.columns + assert result['unique_products_viewed'].sum() > 0 + + +# UserAgentFeatureStep tests +def test_ua_empty(pipeline_context): + result = UserAgentFeatureStep(pipeline_context).transform(pd.DataFrame()) + assert 'sessionId' in result.columns + + +def test_ua_headless_detection(pipeline_context): + df = pd.DataFrame({ + 'sessionId': ['s1', 's2'], + 'userAgent': ['Mozilla/5.0 Chrome/120', 'HeadlessChrome/120'], + }) + result = UserAgentFeatureStep(pipeline_context).transform(df) + assert 'is_headless' in result.columns + headless = dict(zip(result['sessionId'], result['is_headless'])) + assert headless['s1'] == False + assert headless['s2'] == True + + +def test_ua_browser_family(pipeline_context): + df = pd.DataFrame({ + 'sessionId': ['s1', 's2', 's3'], + 'userAgent': ['Mozilla/5.0 Firefox/120', 'Safari/605.1.15', 'Unknown'], + }) + result = UserAgentFeatureStep(pipeline_context).transform(df) + browsers = dict(zip(result['sessionId'], result['browser_family'])) + assert browsers['s1'] == 'Firefox' + assert browsers['s2'] == 'Safari' + assert browsers['s3'] == 'Other' + + +def test_ua_automation_detection(pipeline_context): + df = pd.DataFrame({ + 'sessionId': ['s1', 's2'], + 'userAgent': ['Selenium WebDriver', 'Normal Chrome/120'], + }) + result = UserAgentFeatureStep(pipeline_context).transform(df) + auto = dict(zip(result['sessionId'], result['is_automation'])) + assert auto['s1'] == True + assert auto['s2'] == False + + +# ExtractSessionFeaturesStep tests +def test_extract_empty(pipeline_context): + result = ExtractSessionFeaturesStep(pipeline_context).transform(pd.DataFrame()) + assert result.empty + + +def test_extract_merges_all(pipeline_context, session_interactions): + result = ExtractSessionFeaturesStep(pipeline_context).transform(session_interactions) + expected = ['session_duration_sec', 'total_events', 'unique_products_viewed', 'is_headless'] + for col in expected: + assert col in result.columns + assert 'experimentId' in result.columns + + +# JoinLabelsStep tests +def test_join_labels_tuple_input(pipeline_context): + features = pd.DataFrame({'sessionId': ['s1'], 'experimentId': ['exp1'], 'total_events': [5]}) + experiments = pd.DataFrame({'id': ['exp1'], 'xp_human_only': [True]}) + result = JoinLabelsStep(pipeline_context).transform((features, experiments)) + assert 'is_agent' in result.columns + assert result.iloc[0]['is_agent'] == False + + +def test_join_labels_empty_experiments(pipeline_context): + features = pd.DataFrame({'sessionId': ['s1'], 'experimentId': ['exp1']}) + result = JoinLabelsStep(pipeline_context).transform((features, pd.DataFrame())) + assert pd.isna(result.iloc[0]['is_agent']) + + +# ValidateDataStep tests +def test_validate_empty(pipeline_context): + ValidateDataStep(pipeline_context).transform(pd.DataFrame()) + report = pipeline_context.get_cached('validation_report') + assert report['status'] == 'empty' + + +def test_validate_missing_cols(pipeline_context): + df = pd.DataFrame({'sessionId': ['s1'], 'ts': ['2025-01-01']}) + ValidateDataStep(pipeline_context).transform(df) + report = pipeline_context.get_cached('validation_report') + assert report['status'] == 'invalid' + assert 'eventName' in report['missing_cols'] + + +def test_validate_valid(pipeline_context, session_interactions): + ValidateDataStep(pipeline_context).transform(session_interactions) + report = pipeline_context.get_cached('validation_report') + assert report['status'] == 'valid' + assert report['sessions'] > 0 diff --git a/lib/separability.py b/lib/separability.py new file mode 100644 index 0000000..a93ddeb --- /dev/null +++ b/lib/separability.py @@ -0,0 +1,128 @@ +"""Utilities for loading separability artifacts and scoring interaction sessions.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Sequence + +import joblib +import numpy as np + +from experiments.ml.arch import featurize_trajectory + + +DEFAULT_ARTIFACT_DIR = Path("data/separability") + + +@dataclass +class SeparabilityArtifacts: + scaler: object + classifier: object + states: List[str] + event_transitions: Dict[str, Dict[str, float]] + feature_dim: int + + +def _normalize_events(raw_events: Sequence[object]) -> List[object]: + events: List[object] = [] + for evt in raw_events: + if hasattr(evt, "value") and hasattr(evt.value, "payload"): + events.append(evt.value.payload) + else: + events.append(evt) + events.sort(key=lambda e: getattr(e, "ts", "")) + return events + + +def _event_transition_distribution(events: Sequence[object]) -> Dict[str, Dict[str, float]]: + counts: Dict[str, Dict[str, int]] = {} + for src_evt, dst_evt in zip(events, events[1:]): + src_name = getattr(src_evt, "eventName", "unknown") + dst_name = getattr(dst_evt, "eventName", "unknown") + counts.setdefault(src_name, {}) + counts[src_name][dst_name] = counts[src_name].get(dst_name, 0) + 1 + + distribution: Dict[str, Dict[str, float]] = {} + for src, dsts in counts.items(): + total = float(sum(dsts.values())) + distribution[src] = {dst: val / total for dst, val in dsts.items()} if total else {} + return distribution + + +def _kl_divergence(p: Dict[str, Dict[str, float]], q: Dict[str, Dict[str, float]]) -> float: + eps = 1e-10 + total = 0.0 + for src, dsts in p.items(): + for dst, prob in dsts.items(): + ref = q.get(src, {}).get(dst, 0.0) + total += (prob + eps) * np.log((prob + eps) / (ref + eps)) + return float(total) + + +def load_artifacts(artifact_dir: Path | str = DEFAULT_ARTIFACT_DIR) -> SeparabilityArtifacts: + artifact_dir = Path(artifact_dir) + scaler_path = artifact_dir / "scaler.joblib" + model_path = artifact_dir / "classifier.joblib" + metadata_path = artifact_dir / "metadata.json" + + if not (scaler_path.exists() and model_path.exists() and metadata_path.exists()): + raise FileNotFoundError( + f"Separability artifacts not found in {artifact_dir}. Run sim.strong_learner.train first." + ) + + scaler = joblib.load(scaler_path) + classifier = joblib.load(model_path) + with open(metadata_path, "r", encoding="utf-8") as fin: + metadata = json.load(fin) + + return SeparabilityArtifacts( + scaler=scaler, + classifier=classifier, + states=list(metadata["reference_states"]), + event_transitions=metadata["event_transitions"], + feature_dim=int(metadata["feature_dim"]), + ) + + +def score_session( + raw_events: Sequence[object], + artifacts: SeparabilityArtifacts, +) -> dict: + events = _normalize_events(raw_events) + if not events: + return {"prob_agent": 0.0, "delta_h": 0.0, "delta_a": 0.0} + + reference_mdp = {"states": artifacts.states} + features = featurize_trajectory(events, mdp=reference_mdp, input_dim=artifacts.feature_dim) + scaled = artifacts.scaler.transform(features.reshape(1, -1)) + prob_agent = float(artifacts.classifier.predict_proba(scaled)[0, 1]) + + session_dist = _event_transition_distribution(events) + delta_h = _kl_divergence(session_dist, artifacts.event_transitions.get("human", {})) + delta_a = _kl_divergence(session_dist, artifacts.event_transitions.get("agent", {})) + + return { + "prob_agent": prob_agent, + "delta_h": delta_h, + "delta_a": delta_a, + } + + +def estimate_alpha(prob_agent: float, delta_h: float, delta_a: float, temperature: float = 1.0) -> float: + divergence_mass = delta_h + delta_a + if divergence_mass <= 1e-8: + return float(prob_agent) + + ratio = delta_a / divergence_mass + blended = 0.5 * prob_agent + 0.5 * ratio + if temperature <= 0: + return float(np.clip(blended, 0.0, 1.0)) + + scaled = 1.0 / (1.0 + np.exp(-temperature * (blended - 0.5))) + return float(np.clip(scaled, 0.0, 1.0)) + + +def score_sessions(raw_sessions: Iterable[Sequence[object]], artifacts: SeparabilityArtifacts) -> List[dict]: + return [score_session(events, artifacts) for events in raw_sessions] diff --git a/paper/src/chapters/slacberger.tex b/paper/src/chapters/slacberger.tex new file mode 100644 index 0000000..7728c91 --- /dev/null +++ b/paper/src/chapters/slacberger.tex @@ -0,0 +1,69 @@ + +\section{Problem Formulation: A Stackelberg Game Approach} +\label{sec:math_formulation} + +We formalize the interaction between the dynamic pricing system and non-human actors as a \textit{Stackelberg Game} (Leader-Follower) with incomplete information. This framework captures the hierarchical nature of the problem: the Platform (Leader) sets a pricing policy, and the Actors (Followers)---both Humans and Agents---observe these prices and react strategically. + +\subsection{The Players and Objectives} + +Let $t \in \{1, \dots, T\}$ denote discrete time steps. At each step, the system interactions are defined by the following entities: + +\paragraph{1. The Leader (The Platform)} +The e-commerce platform acts as the leader, choosing a pricing policy $\pi$ to maximize total expected revenue. At time $t$, given a state $s_t \in \mathcal{S}$ (representing inventory, time of day, and historical interactions), the platform sets a price $p_t \in [p_{\min}, p_{\max}]$. + +The platform's goal is to maximize the cumulative revenue from genuine human transactions while mitigating the distortion caused by agent interactions. + +\paragraph{2. The Followers (The Demand Mixture)} +The observed demand is not a monolithic signal but a mixture of two distinct populations with divergent objective functions. Let $u$ denote an incoming actor. The type of the actor $\theta \in \{H, A\}$ is a latent variable, where $H$ denotes a Human and $A$ denotes an Agent. + +\begin{itemize} + \item \textbf{The Human ($H$):} Acts as a \textit{myopic utility maximizer}. A human $i$ has a private valuation $v_i$ for the product. They execute a purchase decision $d_i \in \{0, 1\}$ based on the consumer surplus: + \begin{equation} + d_i(p_t) = \mathbb{I}(v_i - p_t \geq 0) + \end{equation} + where $\mathbb{I}(\cdot)$ is the indicator function. The aggregate human demand $q_H(p_t)$ follows a standard downward-sloping demand curve $D(p_t)$. + + \item \textbf{The Agent ($A$):} Acts as an \textit{information maximizer} (reconnaissance). The agent does not intend to purchase at the displayed price $p_t$ unless an arbitrage condition is met. Instead, the agent generates interaction events (queries) to estimate the platform's pricing function $f(p)$. The agent's reward function $R_A$ is defined by Information Gain: + \begin{equation} + R_A(p_t) = H(\mathcal{P}) - H(\mathcal{P} \mid p_t) - c_{query} + \end{equation} + where $H(\mathcal{P})$ is the entropy of the agent's belief regarding the price distribution, and $c_{query}$ is the marginal cost of interaction (assumed $\approx 0$ for LLMs). +\end{itemize} + +\subsection{The Demand Contamination Model} + +% MAYBE alpha has to be \lambda which we also need to formally define still + +The core difficulty in this setting is that the platform observes only the aggregate interaction volume $\hat{q}_t$, which is a contaminated signal. Let $\alpha_t \in [0, 1]$ represent the proportion of traffic generated by agents at time $t$. The observed signal is: + +\begin{equation} + \hat{q}_t(p_t) = (1 - \alpha_t) \cdot q_H(p_t) + \alpha_t \cdot q_A(p_t) + \epsilon_t +\end{equation} + +where: +\begin{itemize} + \item $q_H(p_t)$ is the \textit{true signal} (conversion intent). + \item $q_A(p_t)$ is the \textit{adversarial noise} (reconnaissance queries). + \item $\epsilon_t$ is random market noise. +\end{itemize} + +Crucially, $q_A(p_t)$ is often inversely correlated with $q_H(p_t)$ in terms of utility; agents may flood the system with queries during high-volatility periods to map price boundaries, artificially inflating $\hat{q}_t$ without converting. + +\subsection{The Optimization Objective: Robust Revenue} + +Standard dynamic pricing algorithms (e.g., Thompson Sampling or UCB) assume $\alpha_t = 0$, estimating demand $\hat{D}(p) \approx \mathbb{E}[\hat{q} | p]$. In the presence of agents ($\alpha_t > 0$), this estimator becomes biased, leading to the \textit{Cost of Information} (COI) defined in Section 3.2. + +We propose a robust optimization objective. The platform seeks a pricing policy $\pi^*$ that maximizes worst-case revenue over a statistically plausible set of contamination rates $\alpha$: + +\begin{equation} + \pi^* = \argmax_{\pi} \sum_{t=1}^T \mathbb{E}_{s_t} \left[ \min_{\alpha} \left( p_t \cdot \hat{q}_t(p_t | \theta=H) \right) - \lambda \cdot \mathcal{L}_{detect}(\hat{q}_t) \right] +\end{equation} + +Here: +\begin{itemize} + \item The first term, $p_t \cdot \hat{q}_t(p_t | \theta=H)$, represents the revenue generated strictly from the estimated human segment. + \item $\mathcal{L}_{detect}$ is a penalty term for failing to separate distributions (the cost of confusion). + \item $\lambda$ is a hyperparameter balancing revenue exploitation vs. robust detection. +\end{itemize} + +This formulation effectively transforms the pricing problem into a \textit{Distributionally Robust Optimization (DRO)} problem, where the learner must guard against adversarial perturbations (Agent traffic) in the observed demand distribution. diff --git a/paper/src/graphics/gcp.png b/paper/src/graphics/gcp.png new file mode 100644 index 0000000000000000000000000000000000000000..12e95ae8940e382976648e3e28a9c157c31eddbc GIT binary patch literal 19226 zcmb@tWl$Z_(k?t32yOv_Ly+JQ9D)-p5F`Y5cX!v|fuO-da7b_q8+Qxt?oMFiu6H=+ zt+(pCRo}l06hjTOX07gC-Tm~_Yey(5e84~@Mg;%>Lq=Lc2>{^0KOq1zB6#VJ9X1Cq z6vom@@&MpP3jhJZ0B{T53fKhzS2h6nX8-{FDF8s=@T1|gAoveNBiRoUz|-@0R&!wj zc!$PCQp3f>(8ZMB*vS;U032)_JS=SNEF5eq?A-hu;14eo8yi0xTbBy=A6pZ$L(u`u%%2Y`mgIW>bhh35 z;Y~{sj4g&t`TycqI9DGG2*$4O{QPSqGrca%avXtHNXrsWbj%ky0EFO=&-S!Q zRV<_Cu#o4A%-WtX6{8qH2PlI3Ff0M&IJ#+hVI%)jk7%=PV}KMP$2Y~(9uBpt$%F}V z1b_cFe0d^G-#o_4?TSuMYtKzH1Y3v>1E9S9<(d#Pn~6T6DVKt{oePO_q2G^z2*Mv% ze-)Jd@YZk57J3wdP31feVCTdb1We0kcrY{kc(Gj#@b|$|fPesA2s4#9EAdSi)03kB zIb|^S^OMPU=QW)le)Gg+B5whh4gx>9^x!9!WNdqSSp2s*p+PUjTs{#4V&_WMg`}>6 zFLk?utG(HxkCJTBnYsGvWI~aHcQf)J6cVh}cK^C$KRGOx7S1n>U2?xSwr4=u{XS(^b68_8m-Ezl}ktTyxf z7sWnXKyzQ_h{PiXV87K6*xEh9J|UwZU3X)C>z#&TYK-67)BzI>-nIX}06`whWE0&l z{DhvczLR%~1jteKw9d#2-8x10U&Uc7#FGP*q*ZS7?8e9qFWY|G^^OaiQ0sB>IYYO~ z{r246h@B&W+UtPqo5=~H6Jx>64!|_LxO1-13yxwmPS>^V!l)!#65jGsI@!R^bB64AJ8CUDq+PLR36(6ZGvf=sr)D4BTY}wG!?_ zD-MWl0`N6<^J7y8Wex&KExX;H6CBR6TiLmHFJ&U$SLc3?&;KGeD+k(WHVUF8V!K`n z6fH*QvwAsBz8Ms_W_*)~1hfSU)cYD3cz%z!>!p@7I3M*hM#x6=UCsZ3D=LK^GG-!kMW@0X_vB zm+QL;4g>A#u2-8++!A=2PTg%<&<*^Y@&r~UN-vA0bLvk(aW{av6rEpQdAhk0@&@;l zzFYTd;bOFYc+0c6=WVw{VKK%NXN<_XpupvSt4>$!STxA$UF=SJ?UKdATRwu5#?eHJ zz4u;Mx-mcO9;3SeO&!2aQ9Vbn1qU6VlF+9CO;AkS-R1(_*tOQ%B|STOzqJw( zy|g-as|_8X1|I^gU}6ZHjOcy2#sCPFcHbrve8>#;TuHMQ-+Fpgs*zdaaTsg9^fn#a z_Zt@w+&9`-2-AO8;|raE_)Newn6HuBA`QHVx?ZzcB=+V_NsTv7k^W!{dwsT^BtP<6 zpkzu8(^xr%0}qxfLnggyALs}dQ0ev}KB(Wn(Y+Rf(Bt|{B-d79S%dPH-_z?Qm3Zg8 zd*csp`>jlhRR+YTDe?Q!g3kEyrpfXAaVkO{8=S?wm%e>~Wv#^RFaP9)s<;y_hJ z%HxymJh<{tsTG|}vB`Gd9-@;J8HpBGo`)nFj$V@P)Ah&}zR#fZFYXnG{2&4w645#O=J8+u4>B0SJ~xR$D}AQ4VwKQ60xF`1sKBjsR2szzv1a8G^oe;Y!E!^pri3 ztjBlE&U+a4U%gjkorSL?1EgDS%)sG9DAekT?x*&S8wF5dc*^db7Fveruzi`i&$53{P~@OxCiOzXSb zJh7e3dw2BE5nX$ATVZ=>xi6DuC(C9!wBl@bjJ7-^lVtU)FKH&Ft2u#dV@{ zGT_OD2@w%9-AQ*~&u#^c(o5i3#fZ3O3%>HF7R54|yr`258%eFFsS8)_MKtP|TG}ZV z>2MuNP$P=tExQ#YaKs)hE>*}M<(S17=?4NLuB5-YhEbLw#AB$&5+swyYJ{B&Kujp( zWEK@J#UrB-lxWo!@f@G^+>G^nN0N;fuVYd18vPG}uf4|^6UkBw*4(?xPl5(*wJfp# z|Ma)}i0f4vb#%W0*d2w0J&gbaHSP-)s&hksCgD#1KE;>i0k@Q%JHg#h)PsPP^-jR+`eIyts`fX z8r&hrdtL$s`=g5l_(HwvIiws(g`I(}9p*-(WBtXqt=P?Ra&UM#Byuj7mrry-pB-iq z(Mvr@y3PG|-L|j*Npyr)*MGW2 zIs0|*b4=mVO8O;siW@8`nGE&<5N#_=o5V`e{d}LZutku=yDRhwQLK*s#-#l!d-rb> z=tKe6E)(Z1lMQ(eB2e);g`lF0;e^1-F0Y#!Yu=|>*eTi=2?#XKytCuP*hXOQbU40;zuxXeI@mJ9 zUhY`$2Ih6p1KV8oYpiQ^R=0hzJ6OPM`k=5bGh%|c0^jB!fPXbwmlt=>BoZBHCwbh( zYQcp2)A2@%p0=w%2fF8xyn)@}_&fxawTCda0d2R@`rg(>Lg^Lry5u2uu=K`jyA=c= z1mPn2_e{;fnj3}KI~h>kl~sBboaGQ8##JCmFUSS$()GwY0NK957YzWS#wdJeHtDZ| z{$`1}fDvn#H=fR`434c6_RcHLEGF(Ah3v%)CO=ys?_vvzKuTT{i+TfDFt!5dWxE9P zKbha*$(?RJzG|7_WOjtezaJl>pGQRZ>4gbYa<&-F|Iu!Nx#5Yudru5d9kB=DIbs0i z3zg@XKIP^br#`%y_fYRg^BN4%;swWpEI^c**!0E@&-2q`pX9PE^F=5?y5mOLEPnL+ zzEx~?(+yd`X!TcNGfS0BY@au%2G4JqU5ZBMi3fkK4Bc?h zXFa$)LQ7$d(*xp0p2w@@^l3v3Y8D*pWEr2x36~EUnB4`CDV;(VF-605K-I1z9DRhl zbQ4HYAZ{Z;Z<5SXrD#S`lo5Id8`G zAR?DHUPE6oy!@4NuG7cpEuqveJM5kXV>k&mC&puO6Tk!p!pAI9BEt4me10+-E^NF& zVuTklbZ83=mCgKh-{^Guv=JCl%TkC#4|MD~G#ADz=zDF6@_u@YM@9F%dZjxfduoUu zfw)FLaWcV=AAKI>-g*es>>b)tgRL(tbnrc|=KB5BmCKfW*oe-(ffZ*Kv7pg)KTA~Pi@t+*ygI}P3E3~VPoJ$aKiUs7O z3bBHzR0GVkoGz#Iz)?f2FZ(O>cXKdrir}SZ8L&=N3ad?4#aX=-=zq-H-R-%(;Kae` za)BuE!kyNFSa46ZbD3^c0vOJGowerz9bo1EBaYtuz_N_C&7n`JV zaN0tL{Vy9qbT@43>CT^O{$JgWd2h9nF&GC1)${Q~CBk(QzJOTqgk@eeQK?j)r`wm1_F$n1b5T02;zZpOXeaQ|#K-zD+dzGVe;1(BB5y=He zJQ7B?51SU~xi{A3z{A_WOH@xcUqF6-gYNSxq>4XpYQP6Xh&VJhAlx_Ubr1m71bOCM1Q00?s6g-J zMK!Q-VPq}!s9V^2MJ9cwSa_;*Fn8eQ`QN3G&uxQKalsDVEG3bYYGRZ<|B%>noKv5M zKCkmyfTyz_-iWqAS7$6R70Z>8_0xjl9t7J{91KeKE@cDm+h}RmjiiYqG5aYfZFpf$ z3pIittKf6{NKRkSo(@@sof7>NZ0DSUbzHhnlrQ-R2Cyq^O1Akgy)FMALk>FF1d-D+ zIyW{wkR>=BxM!gp)#>U{9y~`D_3+tB-L>#FYu}vF-_=N6ZwsDQYMY5t_B2A}o?RW3 zn5rCm_AiT9b0|l-bK~Gc;_o35fZl;oMDo`5f35nl zui5o(q@8*0e6-pwSsPg(4y<+!p_kpui3oBIYy+(Fgdgyqw~X7|UC{5=J0}tP5n15J zLR7@@_BazF*}B_YNJYXqXpdY%EP)?eY%iSmYE30^7vFlf2%OB98fQ1fbp8c1e@aq= zt{de6EhvsV;eiW0fDm<+w!J=`AA(e;`{=`c(;$!J)#i`nAyRl|)`RoE4j4@rr-Nw_ zIdQ{Lk(qrl@VEN~L7gww>CohU?RR=L)|acZwXFiPvsya$!3$TS$c=wVfw*LxNTDk@ zm*_}J!M{-hXl8F7w=SD^2MV72_Sb07UWckzd$#e_`HK5^trk z0{UbJvo>*#(g92->Zg2L(h_=!C`ccXafds-rJrNhpwU&;%QUW!5GQmBXFUqEuU{!p@T97M6?8 zG%Xz0%gN6h4SKmAJkI66?gRDi=*0&8MxLT-|C7a@AD9s=dhiMm`Rghy zyU?@Fn;!kWUqqj6de=nu+kzo-oa}#cW`O}3u5Rjp;H?t0wIUR`RfC)icKAk~(g`fJ zsg67|Ww;<7d;~K|{UWSF)tg@f-JkP){j=?_=uwBFB1bFnMtEfBO$J4_PWf$8bZ)k7_3B|(t-WFwiBeWjYW zZJrc3Q}V#J0j3a{EB?9-1aUzeT(oH-tiu+d)2mV z*K1UG4})Y~wbz;KEe2xMlSN=Rr-CI$T+hJH($y$WKBn2a?EC9Un1_VB$BQ6Fo@i!a zX-0p_@dS`v<6!fApJlNhApagCO}-7f7+pwrT8I>oNw=DWiyMS5nVaYBCq#LR^j!Dm z=MvmRo7y50ol^syi_vt-zD-^aM@X`xQ7Zr_|i z>&VKnsaolw@UvxH8r=CqD&;tJFNakh0k*HmpX~)&)rpI)e~;{NF18SW%)Uff{W3t$ z%KCZ(ODZkmTL4fNy@>5s)fv$5`}8>wRs}Oh%+ZN!7TjyH6#}+{vMu!4U`WSofH+`t1hIKh zfzj4W(S+91D0lzqap=_quTwt^KL|&=yN7fd+NaB40A~Cd?mglC^K#tN>Z)4_ ztOk^~NvUhc8l5TxoQjYh#dLCaIQkQ z{ZEHPGw$fNiAJw){9c7NYZmZr>Z;tonpIa8=1`9!ppg5Fl*i)sBZd%G!kU|6b?$G# zV6&)|h^Rk-;sjY>Pa$GI^%&OW;zLZ;{;yp$QcFM% zw6sR^Hn~_2SYX@(3>yvCurvK(o~L2<`wtL7Ej6AUB_5pWR7duJY=&jCFKaQplZ$&@ zLO5b3tfZJ&wji-0sf#OOC~55`MxhH+>rp1VcMwJmsGq}`zVh;%}m<%55p zXuz!6QeOVnynqupc$Y_!*oy5dF;`Nw zku_8ZIY@3_AY4^XwIIh$#2%|ODXI(!B`1uTQWR7ZH^GPXgpy+&yC9ArZrB0V9DT-c zE~j%34Omq1M<2C?eg!rsx>SRtb`U6W2e((v4}l#WA%C{$h4DmEz$Bu!ELc+_hfpRIY(HmcR2lRS7+xbSo;QzvVhYhtEtkW3AqgYsNK7=aMfSnQ1Ml=zD|sQ7*h}GY={2Iik`=YsuMh zY~1TaU!hT+293D?1&lWI0P2Z14=OLsx{SG#7j$oqe(hbRg<)Khwzpa9d&gZDc$Ap& z#I{rX!p7{Efe%Q0q+yO382clS!F|=}OC5~etbekal+N$AF1u%o6=h5?Im7HqRS+7g zEyAwdoB(*a-Hjmo_%^j%;(%zyi+zCvXkQK4AVk)Zdz2d}7%6kRqeZ6lYJH9g*dfaK zhO69IICQgrKD4U3FZ0@u_s0;1{*LfZWGNJkhw2JQVq&ocj7X zm;bziH*-Y2EzM3Q;Xuf=NZGh@cE_&EL*4a)01@gBt!$zFC-o?1(m#zD0ESuMyT-@C zI#a`kd+1UN1WX?GZL4bpUmOg+zF|@9nfvzbj8n7NzA-d3bV#W6EB0{39|)U6-!T!; zQy?!rTYz?q1Vv*F-hJ>9)`z@&5HzLQjx;H4#!jttC87u#u1<LDOE z=>FQ=Tk_8qDgXQT@92470#^`>zfhqY;4#QX#@FI{4!a{oQVOp_U~y>46e zB~$@`ui4y&WV1diL7Udidor~iyM1VFf^Q|`&DSyhVA%CCNa!112?I%(cVk+;i*G&b zaZ!iJ?W=?dRxO#-%WvM|g)$zng`^;(Bfbp6$nxk5XL(fIlnh8tVM{v-AMzjhRu|Nc z)=R^%Ms0v7+M6bg`NU{3Pc9OK%m#^ivKZn4V=8vbOzx`9z1@s18Pkxdat(0QDMErRMITU+Xl8-1IR3D23mJZdzw)^pr z?%Ci)LI!K0C3#IguY-h?Q+D66VFDf^06YX85(`wkXd`A@Mmj0h8mpGdx%l1FBegJ` zJ@O&2t-Eu!8pb6m4}azk$hAEs71sjSIKfcDdsg^!XrlMwW6c7Vcx^_ys*+wyjL`@x zn#{_#UaOWpvuy8#c+EIvybUF`f^75F81t0tP~R)kJ4G(ysru-J*F`A^zhoi8@IoXX zZ;>FpHcK_xa}ng*{Sa@i!?qC~Bk|($E+qaZ~4U9iRUKXdIuHi$Ym}wl4+uW<`x}?~iLJ}K( z|8BXx_}PVnEJ0fl=-ud@A@?X_Yz%8m)!}>v8kK(Z;$&3LMOoSRf=r0<(>osuxeHm^ zAL;F%&j4exaQltmo!Wwe1)c+!A;qm5^g}!K`Du@m9xq{rSstW-RiR}qF~6=d!Mmz> z#Z7GgUq-*juSp0e#?E?XBf8FK5{qX9U#F;Yv#UFfpsT`;uv7)nN0kA8ETJb7{;z?J zkzvu->REyueCm$LiK?(IELGuOM6y*S_Yl!QzV8Mj0wCOSNSbkw*emHvEiLdu%X$hs z0Qok0z*(BfuaQgRgmd3E_hT%z=cHI|= zs5tslV5i2qO1x-5Zi`!bDEz)vO;*BR)7XS8NV^b;7}VYlPG-e3MS_0KSp<@NY{XeB z@iRU%`sBZ(q;mCrldZbbZCa3YtI8p5w}7>252pfN2?4dg=jINo$BQfmzPiV;k_$A@ ztLr%?pY6Gy_4?XqI83So54W_4&>Q8IDC)J0aNV*S^fSN9}?F99hmRwxPKnR=Pl znJ9(B2^*j&N0LDw_N^GO@R$89w_!?$I!*EdE|AKfejL5$H3V;yjp@Z|Kg#P;O&>qg zEjyv+*-eW{W#X@0DT+w@bmgW&;hvZzk|T(4szRPt4_`%8POK51UXX31yEO&}uIwE9 zndIf=nHZdmzKjWQ_#y+`R#D&L~hVk$Tm~i?o z$sTBT7RHEw>(ip+mwdthXs}6|zia7DG7BJ^TjjcX(&G`A1qx3C2C_Sj(CF79x5nVQ z7Z57nAWpP@t6>4w48)wNpiiZ>#tQu!@%GA5z36}c*X)H8ThaK>t|0e#SB?1%Gydwp- zHV!1x&{W1PW^525F12`crfW2i;$lyLta>Y2N4Ih47m0kWfU}GwlgcO1YV_gK^(F4O zuqDT_;aj?2u7h7copi-gBk)7n)s_~Q_Ca_}AMvFX$028;65>)J^S4(#dsHU%FqVgp z_`*i(VNZx}@AOmTRiLFH5xSm-n0RMtrpc`xxEI`kw~oSgJ2yVUxgLGB-m9xC1e^*~ z#-h{#uXmVnOO^jJfR(ZjE|zOwMcqJ_4kZ7c>?HxLZ}#5Cc;qMZGd=d2W0a;;$+J zj_*U&eUda-l@w#RxKxg6YN~3us*pUdRcLQ1A%*Q;SC^ZhdC|Z;pFyqw(60pHym09-vagNe-q81Biyv>F#j{ z-4h}}VXRj=F|GT9+WUPIM$K`@tc7 zBGaPFv){&ht9q8@JwC>48aI<=chs}*UtY>o*7>Uni`be4;v8rK6iW^(_yOhCb1FW{ z&S&2i8e}IugHL^dLo`IDM^=ZMsP7pZfax=0=GO?bTM;+0CP(po0aWh+=ZpBi<~T-26H} zKTZhw+JBn0(;mmc(zs;XektF=s_2GL6kW6=kJ%?m>7NwPtJT22Q3V43?Vg35)-=YY zo5aM%>~A~fijN?HBe<;StnZG(KQqLNP{Br6avd$>0?(|VXhcCuAG0{2pm07vA+&(Mk9T( zV?WO3xVUQYTK|-voqfp-Ogv=w9mU>4#2_lKkr6Qa3+01^HJWJ+47;cAOinFN&@Wkc z7q9Z(5;h&Ly@`*(IVLL{Fx$}tEa_O--oe=3>vv5K1!J>50>#FT!FSp`uRhs;=y*uU z3qT{7Y`$L8Yy1SKXnVp!J}tkhB90neLCHPA%zzeCYiX`pbFD8hM$b~f7|0$`jSG> zE0A7eZs&HL=*i4v6dSxPhgV;#K`g@#-zw{Dtv97dnhe{P#K3_18P$5xLC#AiwyG;3 zZE8JNVSKuxAK3hDb?C8wT_ZxKhoubkDn@=bKGCZt>$oR<$o=b4Qt*}Nb7cF^SgX#A zXP;3_G2K%NzC%kCZU)JUbQ&ho7*+lDHqyvFEX{hWA*=>(N3ev0Xp&tOnrov@ZNeIF zOmiwLx8Z|?5&)lA$yD2X4b6;A596ln%MP2Q!B>R01mGsEe7VQuq}X-n?G(=WSh88n zQ+jVXePcf=?Y9!mk^!PCb~Bw!j+Yxd^?H=+-_?{8DK0AdcTC2yyu9@KGTet3HuJk; zRvku?Y(bXec2KO!(Rq82!KEr-IIG;%z4ARldxr_5ncz&-&6)J4Ue0u4t1?9Q*5>WE zo^INXHP)ZH)^U<~wNOw`~Ir@}h)8KOrKHceS&D$kh{h^tLu7H*L{P z|MvH1IQGw0SL^XFVFn&O03Tpb(LagvY1_^*GtmO<>B8O+J;;!(oTJr4vMqB#q z0zV%A&7AXgnFu;UM;P37Xvo#dn1;G}en+jza+9a6#JK>P49dU#0UQAmPR{YxjN~~! zalW-NUW4S7X6I`avYHgjpqUpf7+i#&pe27qh8gEJyrbO+?d(3)SU zl_xvtjj=VJ?hVxByw3B=NGQKpkW@Q!gtFiTpc)9nTN@J;7N!p^H9E}ySwVPo zNPexkw*_{5c1qmewrbi??v3I5*A}}wS8g|g^Z~;DhY4B}AKS(Bl<4Z%?jBX|7)?9( z#e<%`q~&$^I5%;eN2i4XjB=NYkMBq?Z-e5T;kV+pP_KCdKAO}zFGerJuPI^CT}JUv zA|CxJzT+C$clnHvVfClGujMh=5*|*e3{Ep`U@#%1hEx>VDT^r3%2eE3x8NTLp)A1Gz?%KtId^>6T0?z1TWX94#_S_n*EpGH*Cg z88GXq)_t?H^=Rxd(6*VrG$6&qa_G2TqR$CRG zFH*aBQ#)_s?T;*z@maJs^=54Jyi#n)h1V@tYH@M#f(G$dACtSk2EXkBavUwgtioFs zAN@_Y;uz9CzgWEkt~NPalaSdz*BWBXtq{w>_K{|JxP)H`NI;U+!${_wyS z;NqTa0npMz0k-(A&vdA>oIl(cUc(DyO7mEK&o`!$|HkU{>d(j6d}}7g#eAyln#y!W zcgGd5NkPMG!(dD6t_#<*ca=nuIDn<1(2Cw7RhcNSHf)%qZIJz;j6_>hMZDnAO;C-K zK~LWsDnFpg+4bA)gFr2aW5c#&w}?KTWwkHVZZ+W;D$HHOs%5!V(_mc;?R?G9P{Ye+ z@N*xuVnK<)2!qTRp`R+^r!k%hQ2UGAXUma`*VF z%h8*0zIQ^UobC$#wur*l^Z1p)HIAI`%pIBJ)ykT)pP6^}$G8xp`oW}V@?jP0_~5y}3aI=8jD>~5Z}-O;2NKN1kz zbf>2DD{?ZmGk#I-QBphbCr$2t!C8gvUsU}N~O*Ud%wxHz(&nk>dk%r*U?HaYtFec!p1yjp9clL zyEd*V^%Y1}4K*2n9ZvjFJaJ0_uYu_qXxea2WdO9As3~;|x#luVOgsDpHL2&yjRT(r zJ+5C!`})uT0qr9`EQgbvWl!pYQB(D;tOee{+wZ_y#*lWwIIjCCm*7w}P4 z4f-e^hvw}^gO;Mddr4zZHR2wTuPQSq9k1X?9+KS{UHXS@=pz;FLQm_T?5bs#=s>(o z_MhL26Kr<|QzY9ji^C~2UWfymECK60XYb**!3XUIUH!h_ zQd;i@P_pn@W`%YdvcAvv3eapd9Uv%oz3AZWjhYMpKHz)9ws44b?LF6;s!U2M)yZQo zznM4VWh_etXz+b$+;N%*G%5@pt<3mePeVV_nCa*ee-+eG5P=>G+*)p(N(eMu;qqbOZTrYq& z7soOZ2*06V0N#K>#4bU-2F;jVQ9KutyU^$o`{bB~3jER)T*V(wnQ=y@;%j_Sd<7I( z+6Ovv$*=iia&;xn8@vnGKEgpj?{?Rg&tS>#Y%lhpvcXLugdb(9>mneps#ex>@!Yz7 z*1XC`AvA+8TeE}&+I*y-j6tabm0Xc}FJYZRP?!0HKWB)pbyu8J#yg|1imFs`SIv9%O>-@ie@& zQ)ut|Gp$V&>wzDYu@lUudjR$|z8CulKu`U$77(I@4Dgl=%ztK&9-N@(305VA8~xTZ z@3CfB_RS#^iqiTV^yQUT~(j zl!U{<4ItZoFM$2^JJ?gU<%4E{P%Qk}g?O}{&6v#(1;j#_2)jDTGHbT)Y7%->k*k1h;+_} zo#TG2eY<=wPpU(i=OPIH}!9lzX4{x%KbBeMWSpt71XTHtgQ^k)0fHpr%d~jazDx`*YMbX zDMm42LPGpT0@y{@d*YMKV6tMf8)^V>ga)56_}znTDU|B%g8^;c?Ef2;f_OEm3< zve*qJv>xK)oZ)w-!oA#Y9hwixU%%Yn~N-{*cu^tn^#s;JG zx+sOuuf%iFog-?z-|)^X%sg!=yQjms(T$%H*m8gp+iV$?>9VN=vZdf@1T$5#=_Hi_ zi15@uZzoGbahQl`&8X2Su8?Ge|11IMRGP>C2@043Jr+%=hmbuY7-Zo zUzIdvuXT%SkwZ4ZRrXR6L&){Lgyp~JBY@Ur2}vg^n$I+Z1=42}Xbj0p3;nRhJ^>pB zPClPbPr?{!A(QBB)081Dq!~6lXE#xSAVqFeU)@ul<;8a-8Ib&5eCMqRngPQ4ram23K7aJ#KRwuBsYp zJ$e+;YsLI6CJB+LoMPSOla>GnN*z3S5QAQCxt@Am0elv7qTrG8Q2n3a9}eB$o9 z<-LKytwuL~84ji)dhj%lA)walp%1r#b$8Hw_mE*cs>#H_y3%WK6oOs`XJF}*k@D8k z=2y$_#u<<^G|Nhd6{SA+@ff_JaBIhbHbwsVkzs8!_DknC<(xt@*>CXpBq08f*h9!x z3y42I?3M98w>RWjBBrDtC~CjR3ID0AobJEjAiJW(PHjU>YX4X9c5U3lSH7Pz-k{b+^T-S33>Rm z%kmLB(1Ma>zTmfA0RcY}UQJa<|LsGZXN(?B)D_`%a5(hk7{yRZ?ABP1b)QNYFVC=k zLbrFdl9CdK;w$t9-cIt=7=X)K9;?QV!RvQ1qN2K3_S}X%57?f@#5JSH^_ExfHlCWv z`#3fM)-MQO5(zjf6J^4VI5tiTLepBQ^>o^#h}btg zTs{@ZY7u`WRahlyN#nB8U9kfIQ3|{pc6a9ezjJFGLOpfKF4i&)Wjijaar9I5UX9u|G z46GBV*x69!7w|3%uk?q?)uM)VE~_bX<2d-?+uMKWHK_s5twG>PAkC?wApjQQ1Tq|^ zLC1UGa(Mj~1&-=lWm=7RrW+_ktO(u>X9=tMBk~{?P!rXjNiUy?<5($UMH-IvW|i5+ z#>3dOOs;X-<+S6))F;hmzUOdmnZE*WU@GnYDxCbq`GkQ_370VED-5_3jd2$$%$MdLNdJw$L>ZAndn}B^EN*eElntVCoN25PYS2A>Y20jnrVpP zdrb@g*vKkAK^V830i8=xxo9XrGDtJ?w)ekbLc3o(_G6oJde>t2qxFsHj~}pP=MNh+ zmFQXa{3N_ufG`tzv2OfO`bc@n;?Tmd>?p}&EvW^leoMAfZ>o3AM8o(2BfEC$a$&Jx z<5D)DOiC)j%h^h(n!o(F;UfoAgsm0eav;u@cYzX+A!w<}8P^K-(4*y6EGh{oyo2wu zZ*Z}P4!2!pF@P-Npu%;x2bBgsY#wQxlfA)n3EuLBX~4mCf3T-#-aQP@I|g>Ydb!K6 zAZ5>fCUc@8-~Z6d`Kr9wTSVrsF{EkDKvT_nG(yOe!`hh zhRHw6Sm2+S_$Nq9i*wdc6)|yt2)JGKAwDl`?i=Mda1m+ib#CARFn_k1Pb&i@|4yd7 z4uxUelxj@=0^GbSb*1^S3h=YPn-UDZnmBRV5FdC@$Y=*{CU=vQs?kKqr23u zQC(df8aFtdKm-y+oXQd&UkRK4y=jR4ioK&D7~e?uQ$gu8r9ea8t`(hgbW-uE#BSx5 z<(~$5>ew6J^b%mB={fYJ4r*3vKNGZlHUea#zR}fMBXWh^BvyxmmvCp>L)NlNr$~LL zt66xa);UsEAf?Mirm1DV64y!B9Cg@{j+gl{0^b za)0CaV`yfCjNRB~T5NG5dyFMpXPhjdiD-;%oJLKyY>^pCyA9Ss<^7<`)s3cyWUi;rzJ%iTG5f#zsCumW#&Q?su|fB2 zUhe9Z!J(n6S4O^%&H zR&Pqfhe_Vrnn#~^rPt!xt$e#z@e8N_jzChJV{h8+NtS#HZ`&d^z21IVbmIEFlYjhD zna!Q%+lX~Lfc|3Yo)<9wyTUewU(*ItTFpsZc;;DQHskCuye>u3qsZMg{)SZM#nGh`eyBym z*}Wp8GzfK-c17Oubk~#y%uD+@=z^ffvu*$BZ(d8a6qMGe#n=a16s9->bMin35i2y$b1;244ayuU0u5p>Tky9Obx|Q)P%GQ89iJ-+(T>@b~+*o zgIkYk>=`fQGrSMnAD|0@eZ?O}QdHDSeB1E;DvR^2l_1^WO?`zqYF9NAjLTUpcf-kA zSSxmF>+S;|U$^)Kb^6@L)m<90Q~pE`PI&jl4Erd3qb}j;G=4J~%HsH)(5f08IJQ_@ zvdOV+Z^fLIfa;bAkOB#1e%v+}6zQ|f@1I_UftSQQWh40=twgl6+eoFr8=AQs?>KEKWCh!wBAk}x`}Q!(-#9;f17m7A5ybxvKCya9u3}1 z*vxr8XZG=wdM1Y@Af(dIa+&wwy^vv@ql}fGYHK6zHfu&fRmcv1unVSH2$7K)_Luc9Z*5LAbn7 zNEY!5B)ZvkE0bKw2{XiGg)+!6w5<=g+{eR0dIV3W%ne7}CF@RyMvl+=rn|sCoPsFjfFT`O%+mKyE`m=%nnE>@ zDFdk5Xal)K^i0#xIj(tWzQa4pF*A_)~es{flf0O7o+!zAql@`2}nKW+72zR@9fiVxQlGVr?IB z{>PKKl8&&B;LB?|e6s(5)w=evU)=AYPUOj)QhgNH(=(fKF(E=A^V3=_D8@har0rn1NAbtNuI}EUTQ{An3Q5BDF3zfac40d2g?G%Bme-!_t zw7{9t8?jf=>V*@i?<2&P&}iF*mwZH%iJ8t;{pb*33Kr?T2{LsR7Yd~{{#WmAkbCP^ zCV1cN@Uj$S+_Lh@i_w6W34mDz$CEP63$`LHt}1IKbz43Yb(ty5Nf#u_h>sWvD8q^Z z{EktwM3=Dx!vT&|k3T?MIc36e7IfqW#^a`q#3h3SepU>`=)#czwIhO=DiUKh-FIgy#-Haa*Bo z@qNnVho0cMdss@T4?O9Js^~bJkVKawV%S*Q*xnx#)^OhkpvBm7PBJwOC+vNoM4EM1 z1;31Byi%Lc-4|x|swN>u_}+F@V(TQqO@=MBJ_5wc$tntpHVJ-)iDj>NGD?<`$>{o93Wt)p!mEjE2jD2Fg-xtn(=$SA(+0Uq>;Gx6gs}Hv Y2iTAC9KRYc{R#lEc22f6)`Y}=0fq6PYybcN literal 0 HcmV?d00001 diff --git a/paper/src/graphics/gcp.webp b/paper/src/graphics/gcp.webp new file mode 100644 index 0000000000000000000000000000000000000000..c51bd7e98e7e000fda8d0e34936d195d139c0472 GIT binary patch literal 8484 zcmV+bh&y8({nz+qU(!&9gCYKR3&aa<*+3&k3+w|G%nh%Jej;-Fx;KpUK?a zeI}pv%-r)oTr&mGs-&i}l(f$3=mxN`pc*=N@NN9+vuHYfmMziSsj3v}R40Te*) zn$f{TL?gI0@X&zW0?0JA9buspPYt+4t}Qs6loTp(>w-lSgha&J#3iKwhXVF`BfJE6 zziVP5w*n-C9%iCRDZpBo^zDJjt&Mllf)~JgCBr@U(4htkwE{};1|~#P>Ohih?TLgm zI)ufzRleZv?y>=&x=RXom#%!NZrq{r{kX%Bt+>h2_D_+NUL3CAhu90CUB4Q^-5utc zu3xRYo&qOjrf`>t+%2-$q3c&GsRT%pOxv<;+qP}nwr$(CZQHi(_r7iOc_x4s@c#n$ zu56h^8j=5L1(R%8mBv9W%aGA6WEmKwnecvLly6YaxCCj&w7yPSK}vFA=Re(0|1(5Z ztLum*q^ty0D5$A0>amEr78JBoQ2#~JiAb?OWXujI+Yzspj2!k?ky20?H7cmjj!;l< z1l2AWy~8xa$yoyfCaEyS48eHjB-}7k`ijOd*20aMB9upkG}~B>YsL zl5~(#=gFPag8C<-DsRn8Hxf~^PJTf>2x?rU8AU$2QIOJtt5QLY)yElb9vZ3rhccv^ z9_^}DQ1@SW6aNhTIYWJtpnkck7EHf`S^&l#<&=32(V5|5nScMrDfF#jyVU2Gfnkg> zk9JD^^rVy5MR3bN5mSXz?6^x-0z513K(4&fnm}Tefr2V^itTE6RRD-!1K z%D|vbWlpi{RxKUZusMQwA3b+6tTD>b zFX)20bn*)7wV-E(_bcAx-+SseJY45?oi=AMW-&%xM=Up$;~(DR z@xA1?2>G|a?a!1t8eSD(^F9JJj)yP8SWY?_>px~sui+(wZ~czTE{hP{&C}` z*&?L0orG5ptLS+UGW^ys#t1?EX`$m9mJ;MOnPQ-zwk6ohh*xLGj02x6sA1>a?K&+f zh^K$#iLhPjJMu^DYUCU!DF4HBT*DGaMpKp;D5!lhK|L37hR;Yl#-7EZbA3Mh!BT^9 zf6E>jB1Z&u(sk5=TCa}IJz zTw;iTk@?eYy641)U&Cx$?nAWZ0xZ2^9Fm!*VZEtGVbckFAoUeM0KDIP+)h|;+EG|_ zfrlklPs3d6ufWXPAHxJk-z|-G<~zQTe8A6%Y^;KdA4H0ouGKJ>F!AA6Fz3eWu+pMF z=s9K^_#6@VhA=(oVc+c6lSpMM&$~%Lyd(lKhaKMfpq9ox@fn`I;pBth^GDzgfu+pl zA-%`vWD=Qqo-|lM{7M7jr%!RiVV!esgz7)cy6qvXF|P@p8iD`lQHzD;lgI3n#>P!y z#0`R}*0~X@e=x_UJMcd*B?7YuqauxoTL47*;y@w~LD$6z+XGRx8* z#1|wWep~MB8B|`l(U_;dz;erm!OM)m4FZKdCRsc=3}jl7*oGHfDy*<&s{?WE*=KW8 zZ?C-M=$#rLiDBMAqfO0I5fWujb5|Pkl$`!gL0*t01z^eU=&OFi`EO5fo&*SAZ1l_rX||W4!%GoId2(I6KE}BKaCHw+`Zs3?TpHLVB&wD68!05tvIyS-V~n43TNI zx36&^Y~uUdm(z!weXmF~j?52(cu!`B=rwd`Ap2||UlAeodp+k%+L6S;I-oVjm=&gyTMTb{?M;LCCYYOYs z-C9l`a%sb}f*Hg|Go-kzJSlf+8JNF)o6*p%%rw zPn!G;aMXhPkuEzl#rKO~KQC_~CSIz`{!4N1BG}*atJK6i56%$7K(JF0e1eH5 z>N11CPl9v3OnpyDFaY9Dqsv*cn;ek& z%+elPr}da*(cebn5%`7PTB|9}&KAU9miGS(ce^lknmG=U_XtegLOt`y3BK7<|I%pn z%;joizDK`PAJ#e0o=U<^i*@z|e3ew5li;+G`N_*!^2jKs*Dj908P(>!&qH#k4M_-4 zCL!D=A#5d~UDsINN=Ocbprj0LK?kHXghO%19WX+stDqdh|RGwX%Tr^YWH&wEF|j zlHVz6WdZHy-hK<~H?wK=h^zEeKUxvU|PN%4qMHKs#$T*Y2N_6M4d9Mc4 z$|5TNeH1y5FmZXDqE;5t_fsVO%Q9zX+INbkLpPu-r2RepxB8Wlb>^}Nd}z-pwk-=O zVLghxzt|7jwo9!nr2Rg>cJe7Q|D;!I%PF=k3n}m4C~`k5j7J+zv29sMsrRSI{=2qv zifzk6O8p3m{70Bz1n&1_ms(j!`+a^f_xJQlps2tMTQvXS{VBFB3+XxkMp1)(N9O$Z z?a}w_^|oapf%tr$>_iP0Zq~oSlpJo&mxTm!gOzt(M1~Ky?9qXkl)*m<;i`B*{2PgW zUp1_l9BlC?A*>Y5gXekT84No%g|==I!pkDLR%Bheh9lS67I2#Ow}P7vwBHp4$UTMZ z-QHoCa+NDDF_N1B9!o&k2AjbqXOOR+5Zdp|9uq}%?A~04@c3a z3Oa<&Slf!-o|`G`lwesfA*|f)2`L)-`*;g)6kAxni(6;WqNYW?5^yvHe}$g0wb;r< zMVaj*;TEfh=ne{Y8CPsK>^7x%B3Pm`acc#&SexK^6TV2C;t43aD8YR;DEMMo)t=uY zQDQUY@5RCl@8y4lBX2AZn;~pi@LjZ7^Cmn|T6QA05<5_^X5UAQ=mjD&mTNEg#EumF z7hQY=&HH$}&9r8Crmz=+9|9(GZWX~@yFDamwyzWPUf+?8c+8Vu_7(hM z+@#$EW_o!nwI*)=+afH@by%ErcAB*X4+NvuxdoQ_*lPWgKlzg{n?=5zo=bPcDtkEx3(~fNF8|cze_&u zMtj3g4@3w8VvewKNa*2WMIpqZ`7C2=bw9vgiy}}{w=(?y>_#uRk`WOQHNwi5eh?x5 zeahd9YeF9WS`>jw6Fg%l3SOp__=$;SWo!EOp70aC<+==K_Sd2aR5483vT8`^)@Rw5 za2w;We3TzeG`aX+RE642x3+n~M->+O^YJBm#Y(rU$_>R9N=6qFZ2D_a1nM5CtSESG zxuy0!8l)cF!fs~;bch9C(@N|Dg4?V(>}6y)?qw8#5@ndju;7g~k@19B{2ug8Ahxg5 zQesz)6>}d%CG@H(b`NeAgGqghBtnE-?0(#b%LD_#61wbE1%G9VM3^_f1YX3T;Df!| z626s>ilVkX?4HgbmC%XlXGLK(6q&FKgO|P~nS9Uu6cu)vxfQ8y9m~&E5M1&`CalHa znj(=Tv+ai*luGdWG8}UlqY;v=?I9p|T8d0ti3pY8dFv}!TN~*Fi(Hncrjf(9h8>6D zt{IUD$%6O59E$KfL&2^=3NL-xMLn#<79b{~QQ;P0d8m6Gl5b;CSj#$#F1&}8iwZbn zOHq22$Jmuv_^b?9_zojBZ$XaeipEx3#8UGLj<73+H?xzNYLTCH5XNAjNEBLPMmi+=q|f^Q58 z7=oL2n^(%5wcV11W>a|5>GCyt)EE5bm@1dHw&Oyn#r}>{SOSF~lQ*u>WEOL3`pge} zLBI{27#2LC@bLUIL##W{zK)mAfKbb~C$Vl5F~ME~FR&G1^`sgFMCBMfqM^}3^XIar zV82GTp);_20EHjPBLvr3yuy?}Ft8^K0)`;PSm z?9B)1?96g2bWj9uSvo!;8OGstp&}YS< zG;GPJf(~aSTU$3HpFGZl;DT&cAiOA5k8x9{yKRqU7_<6h`b|d(78J7G79%*o23KpZ z7Z5eZ1{W)Z&^*M*2~H@I2+d^lCX?~vQ)M-bBKp^7^qrDjWo>?48F$Ut)gATuP_EBq z6$Y|Vq4)B2qEF67JGRhj$mR($2eLt1kr=eIu)L8XmMYH%MbR={RG7usCurfQijb&B zg#8z6wh)M!%7*L2ss%P@!yWTAgi_f>Ucpf|Dz-(mF~B+0@FV3}HSvCevpp)NsPGOx z*E}nQRZ}Z2);bU3og5|j04?ScK_T!)))dfdmr2p!jRHzyx>z%-e(zijzGhegDa~Qc ztn@M|4*En^pL>EG-af>y!i}*$QI_IULP0viy_XKlw8(QyYx5f^U_oqIqg!q{nBj`H zR@|065AzF`mVuI#hfd&!$K+FUjxTo}XPGG$3U zI1S*0ump0&y-Pn|mzhSLo>@-A`2D2gwS$Dm05HT~X-@Y;AtBK!QS#IMU2dF!w(S4M{V;H6#?q zIf1NMc(4J>2X`XGj|NJ|xZs&AZ##(e3(_6-FeoY79V0yiy$nArnY4o|DI8t)i450? zBmJ*dIDzae4A(D3+P{)zYITOIdo}Vuu{6YwN3j)=y*)e_0=$ecy<5t7dl$S25avdh zZ9>la2Z#^jj-f;pNQ6=+5n^0zC?!_X8p_(45Ti#yp+S;AQph31k2WIqxtm-;!|O?yM;ai(oYqJVzZ26%x^d7(OqdE(~!a zkFpFugHZ1Rv2|{?M{E|fC1Do-jC87@zkmu!vHSGDVa;LfFkU5FEF3r}U}1tpCzr*v zlkU@jwU9pCS@w1O6?Vl!UKZ*_M9m?8DV(qbng#9ZVON?$B^+-@sF}yb7`_JzUD-@b z3e+FEZO!tm+qLIH7@sAZ6%N#s4I%8xGCsd_Zl_eQBBJ|26Om{cBPF37DENX4XR+?^`yPPsCjoX_=DK1<0cPX9O16<~ zhXHIX(0-}pu@*B@4Z85c zmuBsO8z+;|1lb8(mjfICucquIvh@dwT_d9v%}CA)wuX2;=hT6Q5>cGO9$3pMq!E!| z4>~_qFbR4V5v4=Hm60fQ3x=wo#PztTUS@h`&p*BwG z&#<-m5mCIt3$O+$v>>9E(6O*8FuWrX^@bW_J%8+wVT2_&|5 ztn({OCL*CQR0An~n30Pil$o*+Ikl&ZtGTRbMgl$@@9i>CGzYX)vwx!u-R!?YEBBGyw?uWH3G?<8JS11ALtJp|cozTqJ2gtc)L$a0&xodPt zc0|0|vLk8ih>^@{-W1t?piOWZHBbpPG8aFC;cJrg1EG|5INo$7@JKf+Y> zt5|VSnYHT)adVtT3{Us?aEK3yvpd66Ja$&7fs!nEYlfyOoMgmka{A-_GVbMoxJ?Y) zMAk18>kh!$BN@f9+wJ@Z;z2&5egl^ zBTyR3?SSDEtSS762aSRRbSoEyF@gf5Lfph9Esr`B&`Z@k9dTM z3}2e0Qy^Y9S+B!js!4vRYbAvI;j_+SCU&ljvoq80~ z$O^hb{MA97Yz80b?A*UUK*u2yMn~luEacKOaNiIDT%DS6RFNn;3hte8zm{M>Xp$B3OqDk zqj46)=`k`YRN$WRYNH3RxjjM<5A3dyy(W7mb~8au0p<%&#yi$vmN&#|1F&@#jgWbI zmfxAcX)>Os^)%M&5BTqakmGkraHtyd@I(G)vQpi%T(~Q%*GpDn#JI&>KgEkgt+!y zf;hJv$#7^vBuWEP7SW2`lEE#Y-GP)xYn)*&0v%n(RX$v6sVeLe@*YWpnNv5qve10% zEJ17mh1-|x`>Iu0d)@A&-cW^8uKXacN3d8?K5!VTjwHoiD(pY?TJe$}Y1)rf#|0s3 zq{3O(c#zk_`>b94?1+NW3M^nB{XUtJjK*uS>PiMEc^t(27uM$I)9chEmlZn`*3P*y z=2ppSw{6Yfg$a&Ml<~GrW7iP%3h5Y~8b_MR|D(dpRsQ1b z2fK43t6#Ufith(hn7QR*)tf!6KC9QxAl3C$n7PX5zx`nAwqnZ zWEobUTgn6#NrVO{Fbm!`d{(B)Ev>VJ9}uuub!kGQ&Jee*9E9o~DT~3mbrtBo0M8l` z5BRJjxh12}1c15vITU6Kg;`lXDp%eyE5(p@en&OTSv!+*Y#lYs@_Uf-=o6@6%v+>)hFmuZ1-=xdf>HpFEkU!9y)dlp+lY!u?Am%bvt@w*>%O_ry)hL*dgGh+TDNQf=U zm>GW-(#*fEnbaK8Y=2ER(MOxfTZ&8ve<4c=J#h;%L76=iFq0aP`h692n^o1LxYI-W zS!=n6q9^7$V8~YyZihN1&U{c7-&LA3! z+tNT*Ez*n@tRW4F74#y|eEA}EO|+5~Ttw8Ox~X9|iYM#}JK^nCK!}+&@g8ypE%&usO_aN;K2`F_Y5el^%faC8>?jq{n zEXhEXX&Gd)+nI6Nmh=(sq7`w<-zY%1Qg+s-^3V@3I`B zM=#t_FR$T3gdTg|VLEy1`$*xLW{B3>NH6Ks2OuYBcS%6;SHD<*w6UoDN4M1bi`@w~ zvx{I) z2(xmbw38N5yAkzrRnA=X!wG+(rmI3JZx@@lk344=O3$H1^!O_eSGeCNmLktUq4f1` z@(gBgVVxz6{pjx%baWZJ@O^GuBoQJfhvj0$=DmI^LGO`TjTKu!SsyuZ(9=4%I%LgO zy~@oFkOS*B99wg{S3RTeh+@~sLGqjcyX+5ThCNLeARp z)VVB8LO6>bdMabJ&tp)q_bjdM|H5G6AsCCgK%94ejk*tGcv-{Q%MFG&D=t~({M#~| z(uuXGn*ww18ra>NW)74#BwzfpymXVv+iQEgehc5GBv<_Xjg{r8|K@7TXF^Gu2Y zU+!TB{F_jec;>6n2LCQam5&*WQ+*Oeoo5ub)EoFRidwI*!EP;8R{g-oQxv>+^={wX S>$9%hrQ~ty(gOZpfaU;XvP@6_ literal 0 HcmV?d00001 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..70b1f11 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "phantom" +version = "0.1.0" +description = "Pricing Heuristics Against Non-human Transaction Orchestration Mechanisms" +requires-python = ">=3.8" + +[tool.setuptools.packages.find] +include = ["experiments*", "lib*"] diff --git a/scripts/tpu_pod_run.sh b/scripts/tpu_pod_run.sh new file mode 100755 index 0000000..8e1d722 --- /dev/null +++ b/scripts/tpu_pod_run.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env sh +# Executed on each TPU pod worker via `gcloud tpu-vm scp` + `gcloud tpu-vm ssh --worker=all`. +# Authenticates with Artifact Registry using the VM's service account metadata token, +# pulls the TPU trainer image, then runs the W&B sweep agent inside Docker. +# TPU chip devices (/dev/accel*) are exposed via --privileged + /dev volume mount. +# Required env vars: WANDB_API_KEY, SWEEP_ID +# Optional: AGENT_COUNT (default 1, 0 = run until sweep ends) +set -eu + +IMAGE="us-central1-docker.pkg.dev/phantom-trc/phantom/phantom-trainer:tpu-latest" +AGENT_COUNT="${AGENT_COUNT:-1}" + +# use VM service account — no manual key needed on the pod +TOKEN=$(curl -sf -H "Metadata-Flavor: Google" \ + "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \ + | python3 -c 'import sys, json; print(json.load(sys.stdin)["access_token"])') + +echo "$TOKEN" | sudo docker login -u oauth2accesstoken \ + --password-stdin https://us-central1-docker.pkg.dev + +sudo docker pull "$IMAGE" + +# --privileged + /dev mount gives the container access to /dev/accel* (TPU chips) +# --network host lets JAX reach the other pod workers for distributed init +sudo docker run --rm \ + --privileged \ + --network host \ + --volume /dev:/dev \ + -e WANDB_API_KEY="$WANDB_API_KEY" \ + -e SWEEP_ID="$SWEEP_ID" \ + -e AGENT_COUNT="$AGENT_COUNT" \ + "$IMAGE" diff --git a/scripts/tpu_sync_repo.sh b/scripts/tpu_sync_repo.sh new file mode 100644 index 0000000..a26e241 --- /dev/null +++ b/scripts/tpu_sync_repo.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env sh +set -eu + +TPU_NAME="${TPU_NAME:?TPU_NAME is required}" +TPU_ZONE="${TPU_ZONE:-us-central2-b}" +TPU_PROJECT="${TPU_PROJECT:-phantom-trc}" +LOCAL_REPO_DIR="${LOCAL_REPO_DIR:-$(pwd)}" +REMOTE_REPO_DIR="${REMOTE_REPO_DIR:-/tmp/PHANTOM}" +ARCHIVE_PATH="${ARCHIVE_PATH:-/tmp/phantom-sync.tgz}" + +FILE_LIST="$(mktemp /tmp/phantom-sync-files.XXXXXX)" +CLEANUP_LIST=true + +cleanup() { + if [ "$CLEANUP_LIST" = "true" ]; then + rm -f "$FILE_LIST" + fi +} +trap cleanup EXIT + +if [ ! -d "$LOCAL_REPO_DIR" ]; then + echo "local repo directory not found: $LOCAL_REPO_DIR" + exit 1 +fi + +if git -C "$LOCAL_REPO_DIR" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git -C "$LOCAL_REPO_DIR" ls-files -co --exclude-standard > "$FILE_LIST" + python3 - "$FILE_LIST" <<'PY' +import sys +from pathlib import Path + +file_list = Path(sys.argv[1]) +skip_prefixes = ( + "wandb/", + ".venv/", + "venv/", + "node_modules/", + ".next/", + ".turbo/", + "__pycache__/", + ".mypy_cache/", + ".pytest_cache/", + ".ruff_cache/", + "paper/build/", + "tests/e2e/test-results/", +) + +rows = file_list.read_text().splitlines() +kept = [ + row + for row in rows + if row and not any(row == p.rstrip("/") or row.startswith(p) for p in skip_prefixes) +] +file_list.write_text("\n".join(kept) + ("\n" if kept else "")) +PY + tar -czf "$ARCHIVE_PATH" -C "$LOCAL_REPO_DIR" -T "$FILE_LIST" +else + tar \ + --exclude-vcs \ + --exclude=".venv" --exclude="*/.venv" \ + --exclude="venv" --exclude="*/venv" \ + --exclude="node_modules" --exclude="*/node_modules" \ + --exclude=".next" --exclude="*/.next" \ + --exclude=".turbo" --exclude="*/.turbo" \ + --exclude="__pycache__" --exclude="*/__pycache__" \ + --exclude=".mypy_cache" --exclude="*/.mypy_cache" \ + --exclude=".pytest_cache" --exclude="*/.pytest_cache" \ + --exclude=".ruff_cache" --exclude="*/.ruff_cache" \ + --exclude="wandb" --exclude="*/wandb" \ + --exclude="paper/build" \ + --exclude="tests/e2e/test-results" \ + -czf "$ARCHIVE_PATH" \ + -C "$LOCAL_REPO_DIR" . +fi + +gcloud compute tpus tpu-vm scp "$ARCHIVE_PATH" "$TPU_NAME:/tmp/phantom-sync.tgz" \ + --zone="$TPU_ZONE" --project="$TPU_PROJECT" --worker=all + +gcloud compute tpus tpu-vm ssh "$TPU_NAME" \ + --zone="$TPU_ZONE" --project="$TPU_PROJECT" --worker=all \ + --command="rm -rf '$REMOTE_REPO_DIR' && mkdir -p '$REMOTE_REPO_DIR' && tar -xzf /tmp/phantom-sync.tgz -C '$REMOTE_REPO_DIR' && rm -f /tmp/phantom-sync.tgz" + +rm -f "$ARCHIVE_PATH" diff --git a/scripts/tpu_vm_sweep_agent.py b/scripts/tpu_vm_sweep_agent.py new file mode 100644 index 0000000..f0d99b6 --- /dev/null +++ b/scripts/tpu_vm_sweep_agent.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import re +import shlex +import subprocess +import time +from pathlib import Path + +import wandb + + +CLI_MAP: dict[str, str] = { + "algo": "--algo", + "total_timesteps": "--total-timesteps", + "alpha": "--alpha", + "N": "--N", + "n_products": "--n-products", + "lambda_coi": "--lambda-coi", + "info_value": "--info-value", + "robust_radius": "--robust-radius", + "robust_points": "--robust-points", + "learning_rate": "--learning-rate", + "gamma": "--gamma", + "gae_lambda": "--gae-lambda", + "clip_range": "--clip-range", + "ent_coef": "--ent-coef", + "revenue_weight": "--revenue-weight", + "max_steps": "--max-steps", + "margin_floor": "--margin-floor", + "margin_floor_patience": "--margin-floor-patience", + "arch": "--arch", + "activation": "--activation", + "jax_num_envs": "--jax-num-envs", + "jax_num_steps": "--jax-num-steps", + "jax_num_minibatches": "--jax-num-minibatches", + "jax_update_epochs": "--jax-update-epochs", + "jax_anneal_lr": "--jax-anneal-lr", + "checkpoint_interval": "--checkpoint-interval", + "action_levels": "--action-levels", + "action_scale_low": "--action-scale-low", + "action_scale_high": "--action-scale-high", +} + + +def _to_cli_args(cfg: dict) -> str: + parts: list[str] = ["--jax", "--no-wandb"] + for key, flag in CLI_MAP.items(): + if key not in cfg: + continue + value = cfg[key] + if value is None: + continue + if isinstance(value, bool): + if key == "jax_anneal_lr": + parts.extend([flag, "true" if value else "false"]) + elif value: + parts.append(flag) + continue + parts.extend([flag, str(value)]) + return " ".join(shlex.quote(p) for p in parts) + + +_SENTINEL = "PHANTOM_METRICS:" + + +def _extract_metrics(output: str) -> dict: + # fast path: look for the dedicated sentinel line emitted by run_local + for line in output.splitlines(): + if line.startswith(_SENTINEL): + try: + return json.loads(line[len(_SENTINEL) :]) + except Exception: + break + # fallback: scan for any JSON block containing eval/sweep keys; + # use greedy match to capture the largest possible block first + for block in re.findall(r"\{[^{}]*\}", output): + try: + obj = json.loads(block) + except Exception: + continue + if isinstance(obj, dict) and ("sweep/score" in obj or "eval/reward" in obj): + return obj + return {} + + +def main() -> None: + p = argparse.ArgumentParser( + description="Run W&B sweep where each trial uses full TPU pod" + ) + p.add_argument("--sweep-id", required=True) + p.add_argument("--tpu-name", required=True) + p.add_argument("--tpu-zone", default="us-central2-b") + p.add_argument("--tpu-project", default="phantom-trc") + p.add_argument("--tpu-repo-dir", default="/tmp/PHANTOM") + p.add_argument("--count", type=int, default=0) + p.add_argument("--workdir", default=str(Path(__file__).resolve().parents[1])) + args = p.parse_args() + + workdir = Path(args.workdir).resolve() + env = os.environ.copy() + + prepare_cmd = [ + "make", + "train.tpu.vm.prepare", + f"TPU_NAME={args.tpu_name}", + f"TPU_ZONE={args.tpu_zone}", + f"TPU_PROJECT={args.tpu_project}", + f"TPU_REPO_DIR={args.tpu_repo_dir}", + ] + prepare = subprocess.run( + prepare_cmd, + cwd=workdir, + env=env, + text=True, + capture_output=False, + check=False, + ) + if prepare.returncode != 0: + raise RuntimeError("Failed to prepare TPU workers for sweep") + + def run_trial() -> None: + run = None + try: + run = wandb.init() + cfg = dict(wandb.config) + cli_args = _to_cli_args(cfg) + env_trial = dict(env) + env_trial["LOCAL_TRAIN_ARGS"] = cli_args + + cmd = [ + "make", + "train.tpu.vm.run", + f"TPU_NAME={args.tpu_name}", + f"TPU_ZONE={args.tpu_zone}", + f"TPU_PROJECT={args.tpu_project}", + f"TPU_REPO_DIR={args.tpu_repo_dir}", + ] + + proc = subprocess.run( + cmd, + cwd=workdir, + env=env_trial, + text=True, + capture_output=True, + check=False, + ) + + if proc.stdout: + print(proc.stdout) + if proc.stderr: + print(proc.stderr) + + if proc.returncode != 0: + if run is not None: + run.summary["runner/exit_code"] = proc.returncode + raise RuntimeError(f"TPU trial failed with exit code {proc.returncode}") + + metrics = _extract_metrics(proc.stdout) + if metrics: + wandb.log(metrics) + for k, v in metrics.items(): + run.summary[k] = v + run.summary["runner/exit_code"] = 0 + except Exception: + time.sleep(2) + raise + finally: + if run is not None and wandb.run is not None: + wandb.finish() + + wandb.agent( + args.sweep_id, + function=run_trial, + count=args.count if args.count > 0 else None, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/tpu_vm_train.sh b/scripts/tpu_vm_train.sh new file mode 100644 index 0000000..33c798e --- /dev/null +++ b/scripts/tpu_vm_train.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env sh +set -eu + +REPO_DIR="${REPO_DIR:-$HOME/PHANTOM}" +PYTHON_BIN="${PYTHON_BIN:-python3}" +TRAIN_ARGS="${TRAIN_ARGS:---algo ppo --jax --total-timesteps 200000 --jax-num-envs 32 --jax-num-steps 128 --jax-num-minibatches 4 --jax-update-epochs 4}" +EXTRA_PIP="${EXTRA_PIP:-flax optax distrax}" +INSTALL_FULL_REQUIREMENTS="${INSTALL_FULL_REQUIREMENTS:-0}" + +if [ ! -d "$REPO_DIR" ]; then + echo "repo directory not found: $REPO_DIR" + exit 1 +fi + +cd "$REPO_DIR" + +if [ -d "wandb" ]; then + rm -rf wandb +fi + +# keep install idempotent and avoid re-installing jax/libtpu each run +if [ "$INSTALL_FULL_REQUIREMENTS" = "1" ] && [ -f "requirements.txt" ]; then + $PYTHON_BIN -m pip install -r requirements.txt +fi +if ! $PYTHON_BIN -c 'import flax, optax, distrax' >/dev/null 2>&1; then + if [ -f "engine/jax/requirements.txt" ]; then + $PYTHON_BIN -m pip install -r engine/jax/requirements.txt + fi + $PYTHON_BIN -m pip install -U $EXTRA_PIP +fi + +if [ -n "${WANDB_API_KEY:-}" ]; then + if ! $PYTHON_BIN -c 'import wandb; import inspect; assert hasattr(wandb, "init") and callable(wandb.init)' >/dev/null 2>&1; then + $PYTHON_BIN -m pip install -U wandb + fi +fi + +if [ -n "${WANDB_API_KEY:-}" ]; then + export WANDB_API_KEY + exec $PYTHON_BIN -m engine.train $TRAIN_ARGS +fi + +exec $PYTHON_BIN -m engine.train $TRAIN_ARGS --no-wandb diff --git a/scripts/wandb_agent_bootstrap.sh b/scripts/wandb_agent_bootstrap.sh new file mode 100755 index 0000000..effa1b5 --- /dev/null +++ b/scripts/wandb_agent_bootstrap.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +set -euo pipefail + +need_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + echo "$name is required" + exit 1 + fi +} + +need_cmd() { + local c="$1" + command -v "$c" >/dev/null 2>&1 || { + echo "Missing command: $c" + exit 1 + } +} + +need_cmd git +need_cmd python3 + +need_env WANDB_API_KEY +need_env GITHUB_TOKEN +need_env REPO_URL +need_env SWEEP_ID + +BRANCH="${BRANCH:-main}" +WORKDIR="${WORKDIR:-$HOME/PHANTOM-agent}" +AGENT_COUNT="${AGENT_COUNT:-0}" +AGENT_LOOP="${AGENT_LOOP:-1}" +RETRY_SECONDS="${RETRY_SECONDS:-20}" +PYTHON_BIN="${PYTHON_BIN:-python3}" + +mkdir -p "$(dirname "$WORKDIR")" + +ASKPASS_FILE="$(mktemp)" +cat >"$ASKPASS_FILE" <<'EOF' +#!/usr/bin/env sh +case "$1" in + *Username*) echo "x-access-token" ;; + *Password*) echo "$GITHUB_TOKEN" ;; + *) echo "" ;; +esac +EOF +chmod 700 "$ASKPASS_FILE" + +cleanup() { + rm -f "$ASKPASS_FILE" +} +trap cleanup EXIT + +git_auth() { + GIT_TERMINAL_PROMPT=0 GIT_ASKPASS="$ASKPASS_FILE" git "$@" +} + +sync_repo() { + if [ ! -d "$WORKDIR/.git" ]; then + rm -rf "$WORKDIR" + git_auth clone --single-branch --branch "$BRANCH" "$REPO_URL" "$WORKDIR" + return + fi + + git -C "$WORKDIR" remote set-url origin "$REPO_URL" + git_auth -C "$WORKDIR" fetch origin "$BRANCH" --prune + git -C "$WORKDIR" checkout -B "$BRANCH" "origin/$BRANCH" + git -C "$WORKDIR" reset --hard "origin/$BRANCH" +} + +install_deps() { + "$PYTHON_BIN" -m venv "$WORKDIR/.venv" + "$WORKDIR/.venv/bin/pip" install --upgrade pip + "$WORKDIR/.venv/bin/pip" install -r "$WORKDIR/requirements.txt" +} + +run_agent() { + local cmd=("$WORKDIR/.venv/bin/python" -m engine.train --sweep-agent --sweep-id "$SWEEP_ID") + if [ "$AGENT_COUNT" != "0" ]; then + cmd+=(--count "$AGENT_COUNT") + fi + + ( + cd "$WORKDIR" + WANDB_API_KEY="$WANDB_API_KEY" \ + WANDB_ENTITY="${WANDB_ENTITY:-}" \ + WANDB_PROJECT="${WANDB_PROJECT:-}" \ + "${cmd[@]}" + ) +} + +while true; do + sync_repo + install_deps + + if run_agent; then + if [ "$AGENT_LOOP" = "1" ] && [ "$AGENT_COUNT" = "0" ]; then + sleep "$RETRY_SECONDS" + continue + fi + exit 0 + fi + + if [ "$AGENT_LOOP" != "1" ]; then + exit 1 + fi + + sleep "$RETRY_SECONDS" +done diff --git a/sim/requirements.txt b/sim/requirements.txt new file mode 100644 index 0000000..d38cfd4 --- /dev/null +++ b/sim/requirements.txt @@ -0,0 +1,7 @@ +gymnasium>=0.29.0 +numpy>=1.24.0 +pandas>=2.0.0 +stable-baselines3>=2.2.0 +tensorboard>=2.15.0 +jax>=0.4.20 +jaxlib>=0.4.20 diff --git a/sim/rl/behavior_loader/visualize_kl.py b/sim/rl/behavior_loader/visualize_kl.py new file mode 100644 index 0000000..e5cd1ef --- /dev/null +++ b/sim/rl/behavior_loader/visualize_kl.py @@ -0,0 +1,117 @@ +import numpy as np +import matplotlib.pyplot as plt +from collections import defaultdict +from models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions, kl_divergence + +def event_frequency_distribution(mdp): + evt_cnt, total = defaultdict(int), 0 + for s, trans in mdp['transitions'].items(): + evt = s.split('|')[2] + for cnt in mdp['trans_counts'][s].values(): + evt_cnt[evt] += cnt + total += cnt + return {evt: cnt/total for evt, cnt in evt_cnt.items()} if total > 0 else {} + +def transition_distribution(mdp): + trans_cnt, total = defaultdict(int), 0 + for s, trans in mdp['trans_counts'].items(): + src = s.split('|')[2] + for s_next, cnt in trans.items(): + dst = s_next.split('|')[2] + trans_cnt[f"{src}->{dst}"] += cnt + total += cnt + return {t: cnt/total for t, cnt in trans_cnt.items()} if total > 0 else {} + +def kl_color(kl): + return '#d62828' if kl > 2.0 else '#f77f00' if kl > 0.5 else '#2a9d8f' + +def plot_comparison(ax, human_vals, agent_vals, labels, title, ylabel, kl_val=None): + x, w = np.arange(len(labels)), 0.35 + ax.bar(x - w/2, human_vals, w, label='Human', alpha=0.8, color='#2E86AB') + ax.bar(x + w/2, agent_vals, w, label='Agent', alpha=0.8, color='#A23B72') + ax.set_ylabel(ylabel, fontsize=9 if len(labels) > 10 else 11, fontweight='bold') + ax.set_title(title if not kl_val else f"{title}\nKL={kl_val:.4f}", + fontsize=10 if len(labels) > 10 else 12, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=8) + ax.legend(fontsize=8) + ax.grid(axis='y', alpha=0.3, linestyle='--') + return ax + +if __name__ == "__main__": + base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" + human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" + + human_model, agent_model = BehaviorModel(human_dir), AgentBehaviorModel(agent_dir) + human_mdp, agent_mdp = human_model.build_MDP(), agent_model.build_MDP() + + human_evt, agent_evt = aggregate_event_transitions(human_mdp), aggregate_event_transitions(agent_mdp) + common = set(human_evt.keys()) & set(agent_evt.keys()) + kl_results = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common], + key=lambda x: x[1], reverse=True) + + fig = plt.figure(figsize=(16, 10)) + n_rows, n_cols = (len(kl_results) + 1) // 2, 2 + + for idx, (evt, kl) in enumerate(kl_results): + ax = plt.subplot(n_rows, n_cols, idx + 1) + h_dist, a_dist = human_evt.get(evt, {}), agent_evt.get(evt, {}) + dests = sorted(set(h_dist.keys()) | set(a_dist.keys())) + if not dests: continue + + h_probs, a_probs = [h_dist.get(d, 0) for d in dests], [a_dist.get(d, 0) for d in dests] + plot_comparison(ax, h_probs, a_probs, dests, f'From: {evt}', 'Probability') + ax.set_ylim([0, max(max(h_probs + a_probs, default=0) * 1.1, 0.1)]) + ax.text(0.95, 0.95, f'KL={kl:.2f}', transform=ax.transAxes, fontsize=11, + fontweight='bold', va='top', ha='right', + bbox=dict(boxstyle='round', facecolor=kl_color(kl), alpha=0.3)) + + plt.tight_layout() + plt.savefig('kl_divergence_comparison.png', dpi=300, bbox_inches='tight') + print("Saved visualization to kl_divergence_comparison.png") + + fig2, ax2 = plt.subplots(figsize=(10, 6)) + evts, kls = zip(*kl_results) if kl_results else ([], []) + colors = [kl_color(kl) for kl in kls] + bars = ax2.barh(evts, kls, color=colors, alpha=0.8) + ax2.set_xlabel('KL Divergence D(Human || Agent)', fontsize=12, fontweight='bold') + ax2.set_ylabel('Event Type', fontsize=12, fontweight='bold') + ax2.set_title('Behavioral Divergence Between Human and Agent Traffic', fontsize=14, fontweight='bold') + if kls: + ax2.axvline(x=np.mean(kls), color='black', linestyle='--', linewidth=2, + alpha=0.5, label=f'Mean={np.mean(kls):.2f}') + for bar, kl in zip(bars, kls): + ax2.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2, + f'{kl:.2f}', ha='left', va='center', fontsize=10, fontweight='bold') + ax2.legend() + ax2.grid(axis='x', alpha=0.3, linestyle='--') + + plt.tight_layout() + plt.savefig('kl_summary.png', dpi=300, bbox_inches='tight') + print("Saved KL summary to kl_summary.png") + + h_freq, a_freq = event_frequency_distribution(human_mdp), event_frequency_distribution(agent_mdp) + h_trans, a_trans = transition_distribution(human_mdp), transition_distribution(agent_mdp) + freq_kl, trans_kl = kl_divergence(h_freq, a_freq), kl_divergence(h_trans, a_trans) + + print(f"\n=== Global Distribution KL Divergence ===") + print(f"Event frequency KL: {freq_kl:.4f}") + print(f"Transition pair KL: {trans_kl:.4f}") + + fig3, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + + all_evts = sorted(set(h_freq.keys()) | set(a_freq.keys())) + h_freqs, a_freqs = [h_freq.get(e, 0) for e in all_evts], [a_freq.get(e, 0) for e in all_evts] + plot_comparison(ax1, h_freqs, a_freqs, all_evts, 'Event Frequency Distribution', + 'Frequency', freq_kl) + + all_trans = sorted(set(h_trans.keys()) | set(a_trans.keys())) + top_trans = [t for t, _ in sorted([(t, h_trans.get(t, 0) + a_trans.get(t, 0)) + for t in all_trans], key=lambda x: x[1], reverse=True)[:15]] + h_tprobs, a_tprobs = [h_trans.get(t, 0) for t in top_trans], [a_trans.get(t, 0) for t in top_trans] + plot_comparison(ax2, h_tprobs, a_tprobs, top_trans, 'Top Transition Pairs Distribution', + 'Probability', trans_kl) + + plt.tight_layout() + plt.savefig('global_distributions.png', dpi=300, bbox_inches='tight') + print("Saved global distributions to global_distributions.png") diff --git a/sim/rl/thesis_core.py b/sim/rl/thesis_core.py new file mode 100644 index 0000000..99c9a4b --- /dev/null +++ b/sim/rl/thesis_core.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, Optional + +import numpy as np + +from sim.case.thesis_simplified.simplified import Session + + +@dataclass(frozen=True) +class PricingStep: + sessions: list[Session] + demand_by_session: Dict[str, float] + demand_by_product: np.ndarray + purchases_by_product: np.ndarray + revenue: float + cost: float + n_agents: int + + +def clip_prices(prices: np.ndarray, min_price: float, max_price: float) -> np.ndarray: + return np.clip(prices, min_price, max_price).astype(np.float32) + + +def constrain_prices( + prev_prices: Optional[np.ndarray], + proposed: np.ndarray, + *, + costs: np.ndarray, + min_price: float, + max_price: float, + max_adjustment: float, + min_margin_pct: float, +) -> np.ndarray: + prices = clip_prices(proposed, min_price, max_price) + floor = (costs * (1.0 + float(min_margin_pct))).astype(np.float32) + prices = np.maximum(prices, floor) + if prev_prices is None: + return prices + prev_prices = prev_prices.astype(np.float32) + ratio = np.clip(prices / (prev_prices + 1e-6), 1.0 - max_adjustment, 1.0 + max_adjustment) + return (prev_prices * ratio).astype(np.float32) + + +def aggregate_demand_by_product( + sessions: list[Session], + demand_by_session: Dict[str, float], + n_products: int, +) -> np.ndarray: + demand = np.zeros(n_products, dtype=np.float32) + sessions_by_id = {s.sid: s for s in sessions} + for sid, q in demand_by_session.items(): + sess = sessions_by_id.get(sid) + if not sess or not sess.events: + continue + pidx = int(sess.events[0].product_idx) + if 0 <= pidx < n_products: + demand[pidx] += float(q) + return demand + + +def aggregate_purchases( + sessions: list[Session], + costs: np.ndarray, + n_products: int, +) -> tuple[np.ndarray, float, float, int]: + purchases = np.zeros(n_products, dtype=np.float32) + revenue = 0.0 + cost = 0.0 + n_agents = 0 + + for sess in sessions: + if sess.actor == "A": + n_agents += 1 + for e in sess.events: + if e.action != "purchase": + continue + pidx = int(e.product_idx) + if 0 <= pidx < n_products: + purchases[pidx] += 1.0 + revenue += float(e.price_seen) + cost += float(costs[pidx]) + + return purchases, revenue, cost, n_agents + From 56585b3de8e2b3198a0b242ca5537a9737835513 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sat, 28 Feb 2026 14:11:39 +0100 Subject: [PATCH 2/2] cleaning path for intergations --- engine/train.py | 6 ++---- sim/rl/behavior_loader/models.py | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/engine/train.py b/engine/train.py index 54fa203..58deba5 100644 --- a/engine/train.py +++ b/engine/train.py @@ -91,10 +91,8 @@ DEFAULT_CFG = { def _truthy(value: str | bool | None) -> bool: - if isinstance(value, bool): - return value - if value is None: - return False + if isinstance(value, bool): return value + if value is None: return False return str(value).strip().lower() in {"1", "true", "yes", "on"} diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index bbe5053..ab67beb 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -254,3 +254,5 @@ if __name__ == "__main__": f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions") if joint_mdp['states']: visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True) + + # TODO: setup intra class divergence as baseline for evaluating and adding significance to the divergence which we observe across class