diff --git a/.gitignore b/.gitignore index 9db7742..16a99a9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,18 +5,28 @@ **/.virtual_documents/ **/session_*.svg **/*graph.svg -paper/src/bib/auto +**/auto/*.el +*.old +**/package-lock.json +**/*.parquet +**/_build/ -# Airflow logs - exclude DAG run logs +paper/src/bib/auto +======= +**/_build/ +paper/src/auto/* +paper/src/bib/auto +docs/goals/*.md +PHANTOM.wiki/ experiments/airflow/logs/* experiments/airflow/logs/scheduler/ experiments/airflow/logs/dag_processor_manager/ -experiments/collected_data/* - -paper/src/auto/* -lib/ -docs/goals/*.md -PHANTOM.wiki/ +experiments/collected_data/ +experiments/agents/collected_data/ +sim/rl/behavior_loader/*.dot +sim/rl/behavior_loader/*.png +sim/rl/behavior_loader/*.svg +sim/rl/behavior_loader/*.pdf tests/e2e/node_modules/** -**/auto/*.el -*.old +lab/case/thesis/runs*/ +sim/case/thesis_simplified/runs*/ diff --git a/Makefile b/Makefile index a197d58..d7fd956 100644 --- a/Makefile +++ b/Makefile @@ -49,8 +49,10 @@ test.backend: $(VENV) test.e2e: @cd tests/e2e && npm install @cd tests/e2e && npx playwright install chromium + @test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env @timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1) @timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1) + @timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1) @cd tests/e2e && npm test .PHONY: test.all diff --git a/backend/provider/app.py b/backend/provider/app.py index fb72a9d..6f9a55d 100644 --- a/backend/provider/app.py +++ b/backend/provider/app.py @@ -47,53 +47,52 @@ def health() -> dict: @app.get("/api/{mode}/price/{productId}", response_model=PriceResponse) def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)): + """ + THIS is the fast lookup service (mechanism). + Priority: session-keyed price > global optimal price > base price + """ product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0] if not product: raise HTTPException(404, f"Product {productId} not found") metadata = product['metadata'] base_price = metadata.get('base_price', 100.0) - # fetch pre-computed prices from registry + # PRIORITY 1: session-aware price (computed by Airflow worker) + if sessionId: + session_price = registry.get_session_price(sessionId, productId) + if session_price is not None: + return PriceResponse( + productId=productId, + price=session_price, + base_price=base_price, + markup=session_price/base_price, + elasticity=None, + model_version='session-aware' + ) + + # PRIORITY 2: global pre-computed prices (surge pricing) prices_df = registry.get_prices('latest') - elasticity_df = registry.get_elasticity('latest') - - if prices_df is None: - # fallback: no pre-computed prices available - return PriceResponse( - productId=productId, - price=base_price, - base_price=base_price, - markup=1.0, - elasticity=None - ) - - # lookup pre-computed price for this product - product_price_row = prices_df[prices_df['productId'] == productId] - if product_price_row.empty: - # product not in pre-computed prices, fallback to base - return PriceResponse( - productId=productId, - price=base_price, - base_price=base_price, - markup=1.0, - elasticity=None - ) - - optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as aresult - - # get elasticity if available - product_elasticity = None - if elasticity_df is not None: - product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId] - if not product_elasticity_row.empty: - product_elasticity = float(product_elasticity_row['elasticity'].iloc[0]) + if prices_df is not None: + product_price_row = prices_df[prices_df['productId'] == productId] + if not product_price_row.empty: + optimal_price = float(product_price_row['optimal_price'].iloc[0]) + return PriceResponse( + productId=productId, + price=optimal_price, + base_price=base_price, + markup=optimal_price/base_price, + elasticity=None, + model_version='surge' + ) + # PRIORITY 3: fallback to base price return PriceResponse( productId=productId, - price=optimal_price, + price=base_price, base_price=base_price, - markup=optimal_price/base_price, - elasticity=product_elasticity + markup=1.0, + elasticity=None, + model_version='base' ) @app.get("/models") diff --git a/backend/server/app.py b/backend/server/app.py index d338408..f100811 100644 --- a/backend/server/app.py +++ b/backend/server/app.py @@ -198,12 +198,16 @@ def dump_logs( auto_offset_reset='earliest', enable_auto_commit=False, value_deserializer=lambda x: json.loads(x.decode('utf-8')), - consumer_timeout_ms=5000 + consumer_timeout_ms=30000, + fetch_max_wait_ms=10000, + max_poll_records=1000 ) events = [] for msg in consumer: events.append(msg.value) + if last_n and len(events) >= last_n * 2: + break consumer.close() diff --git a/docker-compose.yml b/docker-compose.yml index f72f415..ba2e8a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,11 +112,14 @@ services: depends_on: - postgres environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 - _AIRFLOW_DB_MIGRATE=true - _AIRFLOW_WWW_USER_CREATE=true - _AIRFLOW_WWW_USER_USERNAME=admin @@ -136,14 +139,20 @@ services: - airflow-init - redis environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 + - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30 + - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60 - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} + - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth - KAFKA_HOST=kafka - KAFKA_PORT=29092 - BACKEND_URL=http://backend:5000 @@ -173,13 +182,20 @@ services: redis: condition: service_started environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 + - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30 + - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60 + - AIRFLOW__SCHEDULER__PARSING_PROCESSES=2 - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} + - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth - KAFKA_HOST=kafka - KAFKA_PORT=29092 - BACKEND_URL=http://backend:5000 diff --git a/engine/engine.py b/engine/engine.py new file mode 100644 index 0000000..cacac7a --- /dev/null +++ b/engine/engine.py @@ -0,0 +1,66 @@ +from sys import platform +import numpy as np +from .lib.demand import generate_demand, estimate_demand +from .lib.behavior import sample_behavior +from logging import INFO, getLogger +logger = getLogger(__name__) +logger.setLevel(INFO) + + + +class MarketEngine(): + def __init__(self, + alpha = 0.5, + N = 100, + demand_distribution = (50, 10), + demand_sampling_function = np.random.normal): + self.Nagents = int(N*alpha) + self.Nhumans = int(N*(1-alpha)) + self.demand = (demand_sampling_function, demand_distribution) + + def act(self, prices): + demand = generate_demand(prices, *self.demand) + sample_n = lambda n, human: [sample_behavior(demand, human=human) for _ in range(n)] + human_t, agent_t = sample_n(self.Nhumans, True), sample_n(self.Nagents, False) + trajectories = human_t + agent_t + demand_estimate = estimate_demand(trajectories) + return demand_estimate + + def measure(self): + pass + +class PricingEngine(): + def __init__(self, + ) -> None: + pass + + def act(self, demand): + return np.random.uniform(low=25, high=100, size=10) + + + +class Limbo(): + def __init__(self, + platform, + market + ) -> None: + self.platform_turn = True + self.platform = platform + self.market = market + self.output = None + + def step(self): + # we could code golf this a little bit + if self.platform_turn: + self.output = self.platform.act(self.output) + else: + self.output = self.market.act(self.output) + print(self.output) + self.platform_turn = not self.platform_turn + +if __name__ == "__main__": + platform = PricingEngine() + market = MarketEngine() + limbo = Limbo(platform, market) + for _ in range(10): + limbo.step() diff --git a/engine/lib/__init__.py b/engine/lib/__init__.py new file mode 100644 index 0000000..8e17835 --- /dev/null +++ b/engine/lib/__init__.py @@ -0,0 +1,3 @@ +from .demand import generate_demand, estimate_demand +from .behavior import sample_behavior +from .render import DashboardRenderer, style_axis diff --git a/engine/lib/behavior.py b/engine/lib/behavior.py new file mode 100644 index 0000000..1822dde --- /dev/null +++ b/engine/lib/behavior.py @@ -0,0 +1,47 @@ +from sim.rl.behavior_loader.models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions +import pandas as pd +import numpy as np +from .demand import generate_demand + +base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" +human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" + +_cache = {} # lazy cache for models and base pivots + +def _get_base_pivot(human: bool): + key = 'human' if human else 'agent' + if key not in _cache: + model = BehaviorModel(human_dir) if human else AgentBehaviorModel(agent_dir) + mdp = model.build_MDP() + _cache[key] = pd.DataFrame(aggregate_event_transitions(mdp)).fillna(0.0) + return _cache[key] + +def adjust_behavior_to_condition(condition, transition_matrix): + # expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition + cond_norm = condition / np.sum(condition) + n_products = len(condition) + base_vals = transition_matrix.values + base_cols, base_rows = transition_matrix.columns.tolist(), transition_matrix.index.tolist() + + # expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm + expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm)) + new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)] + new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)] + return pd.DataFrame(expanded, index=new_rows, columns=new_cols) + +def sample_behavior(condition, human=True, max_len=40): + base_pivot = _get_base_pivot(human) + adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot) + + trajectory = [np.random.choice(adjusted_transitions.index)] + while len(trajectory) < max_len or 'checkout' in trajectory[-1]: + probs = adjusted_transitions.loc[trajectory[-1]].values + sample = np.random.choice(adjusted_transitions.columns, p=probs/np.sum(probs) if np.sum(probs) > 0 else None) + trajectory.append(sample) + return trajectory + +if __name__ == "__main__": + t=sample_behavior(generate_demand(np.array([10,20,30])), human=True) + print(t) + t=sample_behavior(generate_demand(np.array([10,20,30])), human=False) + print(t) diff --git a/engine/lib/demand.py b/engine/lib/demand.py new file mode 100644 index 0000000..7215f7c --- /dev/null +++ b/engine/lib/demand.py @@ -0,0 +1,45 @@ +import logging +import numpy as np +from logging import getLogger +logger = getLogger(__name__) + +def generate_demand(prices, distribution_method = np.random.normal, distribution_params = (50.0, 10.0)): + # assumption 1: each product has an intrinsic valuation drawn from a normal distribution centered at 50 + product_valuations = distribution_method(*distribution_params, size=len(prices)) + # assumption 2: demand decreases as price increases, following a simple linear model + demand = np.maximum(0, product_valuations - prices) # demand cannot be negative + total = np.sum(demand) + demand = demand / total * 100 if total > 0 else demand # normalize to percentage, avoid div by zero + logger.info(f"Generated demand for prices {prices}: {demand} with valuations from distribution {distribution_params}") + return demand + +def estimate_demand(trajectories): + demand_estimate = {} + for traj in trajectories: + for event in traj: + if 'view_product' in event: + product_id = int(event.split('_')[-1].replace('product', '')) + demand_estimate[product_id] = demand_estimate.get(product_id, 0) + 1 + total_views = sum(demand_estimate.values()) + for product_id in demand_estimate: + demand_estimate[product_id] = (demand_estimate[product_id] / total_views) * 100 # normalize to percentage + return demand_estimate + +# Example usage +if __name__ == "__main__": + np.random.seed(42) + prices = np.array([20.0, 35.0, 50.0, 65.0]) + demand = generate_demand(prices) + print("Generated Demand:", demand) + from .behavior import sample_behavior + N, alphat =200, 0.1 + trajectories = [] + for _ in range(int(N*(1 - alphat))): + trajectories.append(sample_behavior(demand, human=True)) + for _ in range(int(N*alphat)): + trajectories.append(sample_behavior(demand, human=False)) + demand_estimate = estimate_demand(trajectories) + print("Estimated Demand from Behavior:", demand_estimate) + delta = {k: demand_estimate.get(k, 0) - demand[i] for i, k in enumerate(range(len(prices)))} + delta = np.mean([np.abs(v) for v in delta.values()]) + print("Demand Delta:", delta) diff --git a/engine/lib/render.py b/engine/lib/render.py new file mode 100644 index 0000000..a16f215 --- /dev/null +++ b/engine/lib/render.py @@ -0,0 +1,126 @@ +"""rendering logic for PHANTOM environment dashboard""" +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.gridspec import GridSpec + + +def style_axis(ax, title: str = None, xlabel: str = None, ylabel: str = None): + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + if title: ax.set_title(title, fontsize=11, fontweight='bold', pad=8) + if xlabel: ax.set_xlabel(xlabel, fontsize=9) + if ylabel: ax.set_ylabel(ylabel, fontsize=9) + + +class DashboardRenderer: + """stateful renderer for PHANTOM market dynamics visualization""" + + def __init__(self): + self.fig = None + self.gs = None + + def render(self, env) -> None: + if self.fig is None: + plt.ion() + self.fig = plt.figure(figsize=(14, 10)) + self.gs = GridSpec(3, 3, figure=self.fig, hspace=0.35, wspace=0.3, + left=0.07, right=0.95, top=0.92, bottom=0.08) + plt.show(block=False) + + self.fig.clear() + self.fig.suptitle(f'PHANTOM Market Dynamics [t={env._step_count}, a={env.alpha:.2f}]', + fontsize=14, fontweight='bold') + + demand_mat = np.array(env._demand_history).T + price_mat = np.array(env._price_history).T + elasticity = env._compute_elasticity() + + self._render_scatter(env) + self._render_elasticity_bar(env, elasticity) + self._render_session_pie(env) + self._render_price_heatmap(price_mat) + self._render_demand_heatmap(demand_mat) + self._render_correlation(env.n_products, price_mat, demand_mat) + self._render_revenue(env) + + self.fig.canvas.draw_idle() + self.fig.canvas.flush_events() + + def _render_scatter(self, env): + ax = self.fig.add_subplot(self.gs[0, 0]) + prices_flat = np.array(env._price_history).flatten() + demands_flat = np.array(env._demand_history).flatten() + product_ids = np.tile(np.arange(env.n_products), len(env._price_history)) + ax.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none') + if len(prices_flat) > 1: + z = np.polyfit(prices_flat, demands_flat, 1) + p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50) + ax.plot(p_line, np.polyval(z, p_line), '--', lw=1.5, alpha=0.8) + style_axis(ax, "Price-Demand Relationship", "Price ($)", "Demand") + + def _render_elasticity_bar(self, env, elasticity): + ax = self.fig.add_subplot(self.gs[0, 1]) + ax.barh(range(env.n_products), elasticity, alpha=0.8) + ax.axvline(0, lw=0.8, alpha=0.5) + ax.axvline(-1, lw=1, ls='--', alpha=0.5) + ax.set_yticks(range(env.n_products)) + ax.set_yticklabels([f'P{i}' for i in range(env.n_products)], fontsize=7) + style_axis(ax, "Price Elasticity", "(dQ/dP)(P/Q)", None) + + def _render_session_pie(self, env): + ax = self.fig.add_subplot(self.gs[0, 2]) + n_h, n_a = env.market.Nhumans, env.market.Nagents + wedges, _ = ax.pie([n_h, n_a], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': 'white'}) + ax.legend(wedges, [f'H ({n_h})', f'A ({n_a})'], loc='lower center', fontsize=8, + frameon=False, bbox_to_anchor=(0.5, -0.05)) + ax.set_title("Session Mix", fontsize=11, fontweight='bold') + + def _render_price_heatmap(self, price_mat): + ax = self.fig.add_subplot(self.gs[1, :2]) + im = ax.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower') + style_axis(ax, "Price Heatmap P(product, t)", "Step", "Product") + cbar = self.fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02) + cbar.set_label('$', fontsize=8) + + def _render_demand_heatmap(self, demand_mat): + ax = self.fig.add_subplot(self.gs[1, 2]) + im = ax.imshow(demand_mat, aspect='auto', cmap='Blues', origin='lower') + style_axis(ax, "Demand Q(product, t)", "Step", None) + self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02) + + def _render_correlation(self, n_products, price_mat, demand_mat): + ax = self.fig.add_subplot(self.gs[2, 0]) + if price_mat.shape[1] > 2: + corr = np.corrcoef(price_mat, demand_mat)[:n_products, n_products:] + im = ax.imshow(corr, cmap='RdBu', vmin=-1, vmax=1, aspect='auto') + ax.set_xticks(range(n_products)) + ax.set_yticks(range(n_products)) + ax.set_xticklabels([f'Q{i}' for i in range(n_products)], fontsize=6) + ax.set_yticklabels([f'P{i}' for i in range(n_products)], fontsize=6) + self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02) + style_axis(ax, "Price-Demand Correlation", None, None) + + def _render_revenue(self, env): + ax = self.fig.add_subplot(self.gs[2, 1:]) + n_steps = len(env._revenue_history) + demand_std = [np.std(d) for d in env._demand_history] + ax.fill_between(range(n_steps), env._revenue_history, alpha=0.3) + ax.plot(env._revenue_history, linewidth=2, label='Revenue') + ax.set_xlim(0, max(n_steps, 1)) + ax.set_ylim(0, max(env._revenue_history) * 1.1 if env._revenue_history else 1) + + ax2 = ax.twinx() + ax2.plot(range(n_steps), demand_std, linewidth=2, ls='-', alpha=0.9, label='sigma(Demand)') + d_min, d_max = min(demand_std), max(demand_std) + margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5 + ax2.set_ylim(max(0, d_min - margin), d_max + margin) + ax2.set_ylabel('Demand sigma', fontsize=9) + + style_axis(ax, "Revenue & Demand Dispersion", "Step", "Revenue ($)") + ax.legend(loc='upper left', fontsize=7, frameon=False) + ax2.legend(loc='upper right', fontsize=7, frameon=False) + + def close(self): + if self.fig: + plt.close(self.fig) + self.fig = None diff --git a/engine/studies/factors.py b/engine/studies/factors.py new file mode 100644 index 0000000..1fbfbe1 --- /dev/null +++ b/engine/studies/factors.py @@ -0,0 +1,34 @@ +"""shared factor definitions for experimental designs""" +import numpy as np +from dataclasses import dataclass, field +from typing import Callable, Any + +@dataclass +class Factor: + name: str + levels: list + primary: bool = True # full cross vs sampled + +# demand functions with compatible signatures +def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size)) +def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size) +def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size) +def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size) + +DEMAND_FUNCTIONS = { + "linear": demand_linear, + "uniform": demand_uniform, + "exponential": demand_exponential, + "logistic": demand_logistic, +} + +FACTORS = [ + Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True), + Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True), + Factor("n_products", [5, 15, 30, 50], primary=True), + Factor("demand_mu", [30.0, 50.0, 70.0], primary=False), + Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False), + Factor("N", [100, 500, 1000], primary=False), +] + +SEEDS_PER_CONFIG = 5 diff --git a/engine/studies/full_factorial.py b/engine/studies/full_factorial.py new file mode 100644 index 0000000..92210b2 --- /dev/null +++ b/engine/studies/full_factorial.py @@ -0,0 +1,89 @@ +"""full factorial design - all factor combinations""" +import sys +sys.path.insert(0, "..") +import logging +from itertools import product +import json +import hashlib +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + +def generate_configs(): + """generate all factor combinations with seeds""" + all_levels = [f.levels for f in FACTORS] + names = [f.name for f in FACTORS] + + configs = [] + for combo in product(*all_levels): + base = {names[i]: combo[i] for i in range(len(names))} + for seed in range(SEEDS_PER_CONFIG): + cfg = {**base, "seed": seed} + cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8] + configs.append(cfg) + return configs + +def run_single(cfg: dict) -> dict: + """execute one experiment config, return metrics""" + from engine.wrapper import PHANTOM + import numpy as np + + np.random.seed(cfg["seed"]) + demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]] + + env = PHANTOM( + n_products=cfg["n_products"], + alpha=cfg["alpha"], + N=cfg["N"], + ) + env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"])) + + obs, _ = env.reset() + total_reward, steps = 0.0, 0 + + for _ in range(100): + action = env.action_space.sample() + obs, reward, term, trunc, _ = env.step(action) + total_reward += reward + steps += 1 + if term: break + + env.close() + return { + "id": cfg["id"], + "config": cfg, + "total_reward": total_reward, + "avg_reward": total_reward / steps if steps > 0 else 0.0, + "steps": steps, + } + +def run_study(max_workers: int = None, output: str = "results_full.jsonl"): + configs = generate_configs() + log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)") + + results = [] + with ProcessPoolExecutor(max_workers=max_workers) as ex: + for i, result in enumerate(ex.map(run_single, configs)): + results.append(result) + if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}") + + Path(output).write_text("\n".join(json.dumps(r) for r in results)) + log.info(f"wrote {len(results)} results to {output}") + return results + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument("--workers", type=int, default=None) + p.add_argument("--output", default="results_full.jsonl") + p.add_argument("--dry-run", action="store_true", help="only show design size") + args = p.parse_args() + + configs = generate_configs() + log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}") + + if not args.dry_run: + run_study(args.workers, args.output) diff --git a/engine/studies/mixed_lh.py b/engine/studies/mixed_lh.py new file mode 100644 index 0000000..33ea2ee --- /dev/null +++ b/engine/studies/mixed_lh.py @@ -0,0 +1,106 @@ +"""mixed design: full factorial on primary factors, latin hypercube on secondary""" +import sys +sys.path.insert(0, "..") +import logging +from itertools import product +import json +import hashlib +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +import numpy as np +from scipy.stats.qmc import LatinHypercube +from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + +LH_SAMPLES = 10 + +def generate_configs(lh_samples: int = LH_SAMPLES): + primary = [f for f in FACTORS if f.primary] + secondary = [f for f in FACTORS if not f.primary] + + primary_grid = list(product(*[f.levels for f in primary])) + lhs = LatinHypercube(d=len(secondary), seed=42) + + configs = [] + for p_combo in primary_grid: + samples = lhs.random(n=lh_samples) + for s in samples: + sec_vals = { + secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))] + for i in range(len(secondary)) + } + base = {primary[i].name: p_combo[i] for i in range(len(primary))} + base.update(sec_vals) + + for seed in range(SEEDS_PER_CONFIG): + cfg = {**base, "seed": seed} + cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8] + configs.append(cfg) + return configs + +def run_single(cfg: dict) -> dict: + from engine.wrapper import PHANTOM + import numpy as np + + np.random.seed(cfg["seed"]) + demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]] + + env = PHANTOM( + n_products=cfg["n_products"], + alpha=cfg["alpha"], + N=cfg["N"], + ) + env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"])) + + obs, _ = env.reset() + total_reward, steps = 0.0, 0 + + for _ in range(100): + action = env.action_space.sample() + obs, reward, term, trunc, _ = env.step(action) + total_reward += reward + steps += 1 + if term: break + + env.close() + return { + "id": cfg["id"], + "config": cfg, + "total_reward": total_reward, + "avg_reward": total_reward / steps, + "steps": steps, + } + +def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES): + configs = generate_configs(lh_samples) + n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary])) + log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)") + + results = [] + with ProcessPoolExecutor(max_workers=max_workers) as ex: + for i, result in enumerate(ex.map(run_single, configs)): + results.append(result) + if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}") + + Path(output).write_text("\n".join(json.dumps(r) for r in results)) + log.info(f"wrote {len(results)} results to {output}") + return results + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument("--workers", type=int, default=None) + p.add_argument("--output", default="results_mixed.jsonl") + p.add_argument("--lh-samples", type=int, default=10) + p.add_argument("--dry-run", action="store_true", help="only show design size") + args = p.parse_args() + + primary = [f for f in FACTORS if f.primary] + secondary = [f for f in FACTORS if not f.primary] + configs = generate_configs(args.lh_samples) + log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}") + + if not args.dry_run: + run_study(args.workers, args.output, args.lh_samples) diff --git a/engine/train.py b/engine/train.py new file mode 100644 index 0000000..496ecfd --- /dev/null +++ b/engine/train.py @@ -0,0 +1,45 @@ +from stable_baselines3 import SAC +from stable_baselines3.common.callbacks import EvalCallback, BaseCallback +from .wrapper import PHANTOM + + +class RenderCallback(BaseCallback): + """Renders environment on every step for live visualization.""" + def __init__(self, env: PHANTOM): + super().__init__() + self.env = env + + def _on_step(self) -> bool: + self.env.render() + return True + + +env = PHANTOM(n_products=10, alpha=0.3, render_mode="human") +eval_env = PHANTOM(n_products=10, alpha=0.3, render_mode=None) + +model = SAC( + "MultiInputPolicy", + env, + verbose=1, + learning_rate=3e-4, + buffer_size=50000, + batch_size=256, + tau=0.005, + gamma=0.99, +) + +render_cb = RenderCallback(env) +eval_cb = EvalCallback(eval_env, eval_freq=1000, n_eval_episodes=5, verbose=1) + +model.learn(total_timesteps=50000, callback=[render_cb, eval_cb]) +model.save("phantom_sac") + +# test trained policy +env = PHANTOM(n_products=10, alpha=0.3, render_mode="human") +obs, _ = env.reset() +for _ in range(100): + action, _ = model.predict(obs, deterministic=True) + obs, reward, term, trunc, _ = env.step(action) + env.render() + if term or trunc: break +env.close() diff --git a/engine/wrapper.py b/engine/wrapper.py new file mode 100644 index 0000000..0301082 --- /dev/null +++ b/engine/wrapper.py @@ -0,0 +1,118 @@ +import gymnasium as gym +from gymnasium import spaces +import numpy as np +from .engine import Limbo, MarketEngine, PricingEngine +from .lib.render import DashboardRenderer + + +class PHANTOM(gym.Env): + """Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand.""" + metadata = {"render_modes": ["human", "ansi"]} + + def __init__(self, + n_products: int = 10, + alpha: float = 0.3, + N: int = 100, + price_bounds: tuple = (10.0, 150.0), + lambda_coi: float = 0.1, + render_mode: str = None): + super().__init__() + self.n_products = n_products + self.price_bounds = price_bounds + self.lambda_coi = lambda_coi + self.render_mode = render_mode + self.alpha = alpha + self.N = N + + self.market = MarketEngine(alpha=alpha, N=N) + self._platform_stub = PricingEngine() + self._limbo = Limbo(self._platform_stub, self.market) + + self.action_space = spaces.Box( + low=price_bounds[0], high=price_bounds[1], + shape=(n_products,), dtype=np.float32 + ) + self.observation_space = spaces.Dict({ + "demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32), + "prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32), + }) + + self._prices = None + self._demand = None + self._step_count = 0 + self._demand_history = [] + self._price_history = [] + self._revenue_history = [] + self._renderer = None + + def _get_obs(self) -> dict: + demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32) + return {"demand": demand_arr, "prices": self._prices.astype(np.float32)} + + def _compute_reward(self, prices: np.ndarray, demand: dict) -> float: + revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)])) + # TODO: implement supra-competitive price punishment + return float(revenue) + + def _record_history(self): + demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)]) + self._demand_history.append(demand_arr) + self._price_history.append(self._prices.copy()) + self._revenue_history.append(np.sum(self._prices * demand_arr)) + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + self._prices = np.random.uniform(*self.price_bounds, size=self.n_products) + self._demand = self.market.act(self._prices) + self._step_count = 0 + self._demand_history, self._price_history, self._revenue_history = [], [], [] + self._record_history() + return self._get_obs(), {} + + def step(self, action: np.ndarray): + self._prices = np.clip(action, *self.price_bounds) + self._demand = self.market.act(self._prices) + self._step_count += 1 + self._record_history() + + reward = self._compute_reward(self._prices, self._demand) + terminated = self._step_count >= 100 + + return self._get_obs(), reward, terminated, False, {"step": self._step_count} + + def _compute_elasticity(self) -> np.ndarray: + """point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]""" + if len(self._price_history) < 2: + return np.zeros(self.n_products) + p, q = np.array(self._price_history), np.array(self._demand_history) + dp, dq = np.diff(p, axis=0), np.diff(q, axis=0) + valid = np.abs(dp) > 0.5 + with np.errstate(divide='ignore', invalid='ignore'): + elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0) + elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0) + return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products) + + def render(self): + if self.render_mode == "human": + if self._renderer is None: + self._renderer = DashboardRenderer() + self._renderer.render(self) + elif self.render_mode == "ansi": + return f"step={self._step_count}, prices={self._prices}, demand={self._demand}" + return None + + def close(self): + if self._renderer: + self._renderer.close() + self._renderer = None + + +if __name__ == "__main__": + env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human") + obs, _ = env.reset() + for step in range(100): + action = env.action_space.sample() + obs, reward, term, trunc, info = env.step(action) + env.render() + if term: break + env.close() diff --git a/experiments/agents/run.py b/experiments/agents/run.py new file mode 100644 index 0000000..823c3d9 --- /dev/null +++ b/experiments/agents/run.py @@ -0,0 +1,117 @@ +from supabase import create_client, Client +import os +import random +import asyncio +import json +from dotenv import load_dotenv + +from experiments.agents.agent import get_agent, AgentTypes +from lib.kafka_client import get_interactions + +load_dotenv() + +RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + +client = create_client( + os.getenv("NEXT_PUBLIC_SUPABASE_URL"), + os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY") +) +def pick_random_task(): + mode = 'hotel' + tasks = client.table("tasks").select("*").execute().data + if mode == 'hotel': + # drop all that have 'flight' in the description + tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()] + return random.choice(tasks) if tasks else None + +def clear_kafka_data(): + """Delete and recreate Kafka topics to clear all data""" + from kafka.admin import KafkaAdminClient, NewTopic + from kafka.errors import UnknownTopicOrPartitionError + import time + + kafka_host = os.getenv('KAFKA_HOST', 'localhost') + kafka_port = os.getenv('KAFKA_PORT', '9092') + broker = f'{kafka_host}:{kafka_port}' + + admin = KafkaAdminClient(bootstrap_servers=broker) + topics = ['user-interactions', 'price-logs'] + + try: + admin.delete_topics(topics, timeout_ms=5000) + print(f"Deleted topics: {topics}") + time.sleep(2) + except UnknownTopicOrPartitionError: + print("Topics don't exist, skipping delete") + except Exception as e: + print(f"Error deleting topics: {e}") + + new_topics = [ + NewTopic(name='user-interactions', num_partitions=3, replication_factor=1), + NewTopic(name='price-logs', num_partitions=3, replication_factor=1) + ] + + try: + admin.create_topics(new_topics=new_topics, validate_only=False) + print(f"Recreated topics: {topics}") + except Exception as e: + print(f"Error creating topics: {e}") + finally: + admin.close() + +def create_new_experiment(task_id): + import uuid + subject_name = f"agent_{str(uuid.uuid4())[:8]}" + experiment = { + "subject_name": subject_name, + "xp_human_only": False, + "xp_market_mode": "hotel", + "xp_task_id": task_id, + } + response = client.table("experiments").insert(experiment).execute() + return response.data[0] if response.data else None + +if __name__ == "__main__": + clear_kafka_data() + + task = pick_random_task() + if not task: + print("No tasks available") + exit(1) + + experiment = create_new_experiment(task['id']) + exp_id = experiment['id'] + exp_dir = f"{RESULTS}{exp_id}" + os.makedirs(exp_dir, exist_ok=True) + + # construct experiment URL with uuid param + base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000') + agent_url = f"{base_url}/start-task?uuid={exp_id}" + + print(f"Created experiment {exp_id} for task {task['id']}") + print(f"Agent will interact with: {agent_url}") + + # instantiate and run agent + agent = get_agent( + AgentTypes.GENERIC_BROWSER_USE_AGENT, + goal=task['task_description'], + url=agent_url, + timeout=300, + headless=True + ) + + result = asyncio.run(agent.act()) + print(f"Agent result: {result}") + + # export interaction and price data from kafka + interactions = get_interactions(topic='user-interactions', timeout_ms=3000) + prices = get_interactions(topic='price-logs', timeout_ms=3000) + + with open(f"{exp_dir}/int.json", 'w') as f: + json.dump(interactions, f, indent=2) + + with open(f"{exp_dir}/price.json", 'w') as f: + json.dump(prices, f, indent=2) + + print(f"Experiment {exp_id} completed.") + print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}") diff --git a/experiments/airflow/dags/surge_pricing_factory.py b/experiments/airflow/dags/surge_pricing_factory.py index a886d5b..b61e65c 100644 --- a/experiments/airflow/dags/surge_pricing_factory.py +++ b/experiments/airflow/dags/surge_pricing_factory.py @@ -1,3 +1,4 @@ +from pandas.core.algorithms import factorize_array from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago @@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG: # instantiate DAGs for Airflow to discover dag_airline = create_surge_pricing_dag('airline') dag_hotel = create_surge_pricing_dag('hotel') + +# TODO: Refactor this factory from a surge pricing factory to a general pricing factory +# We will do this by passing a pricing strategy class to the factory, since the generic pipeline is: +# take all interaction data, group by sessionId and assign a new price vector to each session +# in the grouping we get a subset of the interactions per sessionId and we can map that to some Features +# we define a custom _get_features(interactions .) methodin the strategy class +# we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector +# this we then publish for each sessionId group +# this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes. diff --git a/experiments/airflow/dags/surge_pricing_pipeline.py b/experiments/airflow/dags/surge_pricing_pipeline.py index b1d7c61..1a3b3d0 100644 --- a/experiments/airflow/dags/surge_pricing_pipeline.py +++ b/experiments/airflow/dags/surge_pricing_pipeline.py @@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs): # rename demand_score to demand for pricer compatibility data = product_features.rename(columns={'demand_score': 'demand'}) + high_thresh = dag_conf.get('high_threshold', 10) + low_thresh = dag_conf.get('low_threshold', 2) + surge_mult = dag_conf.get('surge_multiplier', 1.2) + discount_mult = dag_conf.get('discount_multiplier', 0.9) + + logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}") + logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}") + logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}") + logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}") + surge_pricer = SimpleSurgePricer( - high_threshold=dag_conf.get('high_threshold', 10), - low_threshold=dag_conf.get('low_threshold', 2), - surge_multiplier=dag_conf.get('surge_multiplier', 1.2), - discount_multiplier=dag_conf.get('discount_multiplier', 0.9) + high_threshold=high_thresh, + low_threshold=low_thresh, + surge_multiplier=surge_mult, + discount_multiplier=discount_mult ) surge_pricer.fit(data) data['optimal_price'] = surge_pricer.predict() + base_avg = data['base_price'].mean() + optimal_avg = data['optimal_price'].mean() + price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100 + + logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%") + prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={ 'price': 'current_price', 'demand': 'demand_score' diff --git a/experiments/ml/__init__.py b/experiments/ml/__init__.py index 11b65df..c97eaa9 100644 --- a/experiments/ml/__init__.py +++ b/experiments/ml/__init__.py @@ -1,11 +1,21 @@ from .evals import evaluate from .arch import ( XGBoostAgentClassifier, - LightGBMAgentClassifier + LightGBMAgentClassifier, + ContrastiveWeakClassifier, + TrajectoryEncoder, + WeakClassifier, + contrastive_loss, + featurize_trajectory, ) -__all__ =[ +__all__ = [ 'evaluate', 'XGBoostAgentClassifier', - 'LightGBMAgentClassifier' + 'LightGBMAgentClassifier', + 'ContrastiveWeakClassifier', + 'TrajectoryEncoder', + 'WeakClassifier', + 'contrastive_loss', + 'featurize_trajectory', ] diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py index 4f36e18..1fa4f96 100644 --- a/experiments/ml/arch.py +++ b/experiments/ml/arch.py @@ -1,122 +1,212 @@ # sklearn compatible models for agent detection from sklearn.base import BaseEstimator, ClassifierMixin -from procesing.context import PipelineContext -from typing import Any, Optional, Tuple +from typing import Any, Optional, Tuple, Dict, List from abc import ABC, abstractmethod -import xgboost as xgb -import lightgbm as lgb +from collections import defaultdict import numpy as np import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import sys +from pathlib import Path + +# add lib to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib')) +from lib.features import ( + transition_histogram as _lib_transition_histogram, + temporal_signature as _lib_temporal_signature, + state_coverage as _lib_state_coverage, + transition_entropy as _lib_transition_entropy, + featurize_trajectory as _lib_featurize_trajectory, + parse_timestamp +) +from lib.state import event_to_state, get_event_name, get_timestamp TASK = 'classification' LABELS = ['human', 'agent'] -class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC): - """Base class for tree-based agent detection classifiers with common logic""" +class WeakClassifier(BaseEstimator, ClassifierMixin, ABC): + # a simple contrastive machine learning model learns to distinguish human/agent behavior + # using weakly supervised contrastive learning + augmentation + def __init__(self, **kwargs): + super().__init__() + self.model = None + self.kwargs = kwargs - def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200, - max_depth: int = 6, learning_rate: float = 0.05, - early_stopping_rounds: int = 20): - self.context = context + +class TrajectoryEncoder(nn.Module): + """Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM""" + def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64): + super().__init__() + self.event_embed = nn.Linear(input_dim, hidden_dim) + self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True) + self.proj = nn.Linear(hidden_dim * 2, embed_dim) + + def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (batch, seq_len, input_dim) + h = F.relu(self.event_embed(x)) + _, (hn, _) = self.lstm(h) + hn = torch.cat([hn[-2], hn[-1]], dim=1) # concat bidirectional hidden states + return F.normalize(self.proj(hn), dim=1) # L2 normalized + + +class ContrastiveWeakClassifier(WeakClassifier): + """Contrastive learning classifier for human/agent trajectory discrimination""" + def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs): + super().__init__(**kwargs) + self.input_dim = input_dim + self.embed_dim = embed_dim + self.margin = margin + self.encoder = TrajectoryEncoder(input_dim, embed_dim) + self.classifier = nn.Linear(embed_dim, 2) + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self._fitted = False + + def to_device(self): + self.encoder.to(self.device) + self.classifier.to(self.device) + return self + + def encode(self, x: torch.Tensor) -> torch.Tensor: + return self.encoder(x.to(self.device)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + emb = self.encode(x) + return self.classifier(emb) + + def fit(self, X, y=None): # sklearn interface - actual training in weak.train.py + self._fitted = True + return self + + def predict(self, X: np.ndarray) -> np.ndarray: + self.encoder.eval() + self.classifier.eval() + with torch.no_grad(): + x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device) + logits = self.forward(x) + return torch.argmax(logits, dim=1).cpu().numpy() + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + self.encoder.eval() + self.classifier.eval() + with torch.no_grad(): + x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device) + logits = self.forward(x) + return F.softmax(logits, dim=1).cpu().numpy() + + +def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor: + """Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range.""" + pos_sim = F.cosine_similarity(anchor, positive) # higher = more similar + neg_sim = F.cosine_similarity(anchor, negative) + return F.relu(neg_sim - pos_sim + margin).mean() # want pos_sim > neg_sim + margin + + +def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor: + """Normalized temperature-scaled cross entropy loss (SimCLR style)""" + batch_size = z_i.size(0) + z = torch.cat([z_i, z_j], dim=0) # (2N, embed_dim) + sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature + mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device) + sim.masked_fill_(mask, -float('inf')) + labels = torch.arange(batch_size, device=z.device) + labels = torch.cat([labels + batch_size, labels]) # positive pairs + return F.cross_entropy(sim, labels) + + +# feature extraction utilities - delegating to lib.features for unified implementation +# these wrappers maintain backwards compatibility for existing imports + +def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray: + """Compute normalized histogram of state transitions in trajectory""" + return _lib_transition_histogram(events, state_fn, max_states) + + +def temporal_signature(events: List, ts_fn) -> np.ndarray: + """Extract temporal features: mean/std/skew of inter-event times""" + return _lib_temporal_signature(events, ts_fn) + + +def state_coverage(events: List, state_fn, mdp_states: set) -> float: + """Fraction of MDP states visited by trajectory""" + return _lib_state_coverage(events, state_fn, mdp_states) + + +def transition_entropy(events: List, state_fn) -> float: + """Compute entropy of transition distribution (randomness of navigation)""" + return _lib_transition_entropy(events, state_fn) + + +def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray: + """Convert trajectory to fixed-dim feature vector - uses lib.features implementation""" + mdp_states = set(mdp.get('states', [])) if mdp else set() + + def _ts_fn(e): + return parse_timestamp(get_timestamp(e)) + + def _event_name_fn(e): + return get_event_name(e) + + return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim) + + +# gradient boosting classifiers for comparison baselines +class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin): + """XGBoost classifier for human/agent detection from session features""" + def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs): self.n_estimators = n_estimators self.max_depth = max_depth self.learning_rate = learning_rate - self.early_stopping_rounds = early_stopping_rounds - self.model_ = None - self.feature_names_ = None - - def _to_array(self, X): - """Convert pandas structures to numpy arrays""" - return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X - - def _compute_pos_weight(self, y_arr): - """Calculate scale_pos_weight for class imbalance handling""" - n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum() - return n_neg / n_pos if n_pos > 0 else 1.0 - - def _prepare_eval_set(self, eval_set): - """Convert eval_set to numpy arrays if needed""" - if not eval_set: - return None - X_val, y_val = eval_set[0] - return [(self._to_array(X_val), self._to_array(y_val))] - - @abstractmethod - def _build_model(self, scale_pos: float): - """Build the underlying model instance (must be implemented by subclasses)""" - pass - - @abstractmethod - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - """Fit model with evaluation set (must be implemented by subclasses)""" - pass - - def fit(self, X, y, eval_set=None): - X_arr, y_arr = self._to_array(X), self._to_array(y) - - if isinstance(X, pd.DataFrame): - self.feature_names_ = X.columns.tolist() - - scale_pos = self._compute_pos_weight(y_arr) - self.model_ = self._build_model(scale_pos) - - eval_arr = self._prepare_eval_set(eval_set) - if eval_arr: - self._fit_with_eval(X_arr, y_arr, eval_arr) - else: - self.model_.fit(X_arr, y_arr) + self.model = None + self.kwargs = kwargs + def fit(self, X: np.ndarray, y: np.ndarray): + try: + import xgboost as xgb + self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, + learning_rate=self.learning_rate, **self.kwargs) + self.model.fit(X, y) + except ImportError: + raise ImportError("xgboost required for XGBoostAgentClassifier") return self - def predict(self, X): - return self.model_.predict(self._to_array(X)) + def predict(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict(X) - def predict_proba(self, X): - return self.model_.predict_proba(self._to_array(X)) - - @property - def feature_importances_(self): - return self.model_.feature_importances_ if self.model_ else None + def predict_proba(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict_proba(X) -class XGBoostAgentClassifier(BaseAgentClassifier): - """XGBoost binary classifier for agent detection with class imbalance handling""" +class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin): + """LightGBM classifier for human/agent detection from session features""" + def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs): + self.n_estimators = n_estimators + self.max_depth = max_depth + self.learning_rate = learning_rate + self.model = None + self.kwargs = kwargs - def _build_model(self, scale_pos: float): - return xgb.XGBClassifier( - n_estimators=self.n_estimators, - max_depth=self.max_depth, - learning_rate=self.learning_rate, - scale_pos_weight=scale_pos, - eval_metric='auc', - early_stopping_rounds=self.early_stopping_rounds, - random_state=42, - tree_method='hist', - enable_categorical=False - ) + def fit(self, X: np.ndarray, y: np.ndarray): + try: + import lightgbm as lgb + self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, + learning_rate=self.learning_rate, verbose=-1, **self.kwargs) + self.model.fit(X, y) + except ImportError: + raise ImportError("lightgbm required for LightGBMAgentClassifier") + return self - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False) + def predict(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict(X) - -class LightGBMAgentClassifier(BaseAgentClassifier): - """LightGBM binary classifier for agent detection with class imbalance handling""" - - def _build_model(self, scale_pos: float): - return lgb.LGBMClassifier( - n_estimators=self.n_estimators, - max_depth=self.max_depth, - learning_rate=self.learning_rate, - scale_pos_weight=scale_pos, - metric='auc', - random_state=42, - verbosity=-1 - ) - - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - self.model_.fit( - X_arr, y_arr, - eval_set=eval_arr, - callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)] - ) + def predict_proba(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict_proba(X) diff --git a/experiments/ml/weak_train.py b/experiments/ml/weak_train.py new file mode 100644 index 0000000..eb87a9c --- /dev/null +++ b/experiments/ml/weak_train.py @@ -0,0 +1,246 @@ +import sys +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader") +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml") + +from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel +from sim.rl.behavior_loader.models import JointBehaviorModel +from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory +from typing import List, Optional, Dict +from datetime import datetime, timedelta +from copy import deepcopy +import numpy as np +import random +import torch +from torch.utils.data import Dataset, DataLoader +from torch.optim import Adam +from torch.utils.tensorboard import SummaryWriter + +RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs" +agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" +human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + + +def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel: + """Add random jitter to event timestamp""" + new_evt = deepcopy(evt) + try: + ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00')) + delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms)) + new_evt.ts = (ts + delta).isoformat() + except: + pass + return new_evt + + +def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]: + """Apply random augmentation to trajectory for contrastive learning""" + if len(trajectory) < 2: + return trajectory + + aug_type = random.choice(['window', 'shuffle', 'noise', 'drop']) + + if aug_type == 'window': # random contiguous sub-sequence (70-100% length) + min_len = max(2, int(len(trajectory) * 0.7)) + sub_len = random.randint(min_len, len(trajectory)) + start = random.randint(0, len(trajectory) - sub_len) + return trajectory[start:start + sub_len] + + elif aug_type == 'shuffle': # swap adjacent pairs with probability rate + result = list(trajectory) + for i in range(len(result) - 1): + if random.random() < rate: + result[i], result[i + 1] = result[i + 1], result[i] + return result + + elif aug_type == 'drop': # drop events with probability rate + result = [e for e in trajectory if random.random() > rate] + return result if len(result) >= 2 else trajectory[:2] + + elif aug_type == 'noise': # perturb timestamps + return [_perturb_ts(e, jitter_ms=500) for e in trajectory] + + return trajectory + + +class TripletDataset(Dataset): + """Generate (anchor, positive, negative) triplets on-the-fly with augmentation""" + def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10): + self.sessions = list(data.items()) + self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')] + self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')] + self.mdp = mdp + self.augment = augment_fn + self.input_dim = input_dim + self.multiplier = multiplier + + if not self.human_ids or not self.agent_ids: + raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions") + + def __len__(self) -> int: + return len(self.sessions) * self.multiplier + + def __getitem__(self, idx: int): + anchor_idx = idx % len(self.sessions) + sid, events = self.sessions[anchor_idx] + is_human = sid.startswith('human_') + + anchor = featurize_trajectory(events, self.mdp, self.input_dim) + positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim) + + neg_pool = self.agent_ids if is_human else self.human_ids + neg_idx = random.choice(neg_pool) + negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim) + + label = 0 if is_human else 1 # 0=human, 1=agent + return (torch.tensor(anchor, dtype=torch.float32), + torch.tensor(positive, dtype=torch.float32), + torch.tensor(negative, dtype=torch.float32), + torch.tensor(label, dtype=torch.long)) + + +def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64, + embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None): + """Train contrastive weak classifier on human/agent trajectories""" + joint = JointLoader(human_dir, agent_dir) + data = joint.get_data() + if verbose: + print(f"Loaded {len(data)} sessions") + + joint_model = JointBehaviorModel(human_dir, agent_dir) + ref_mdp = joint_model.build_MDP() + + dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim) + loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True) + + model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin) + model.to_device() + + run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}" + writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}") + + optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr) + ce_loss_fn = torch.nn.CrossEntropyLoss() + + best_loss = float('inf') + for epoch in range(epochs): + model.encoder.train() + model.classifier.train() + total_loss, n_batches = 0.0, 0 + + for anchor, positive, negative, labels in loader: + anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]] + z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]] + + trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin) + ce = ce_loss_fn(model.classifier(z_a), labels) + loss = trip_loss + 0.5 * ce + + optimizer.zero_grad() + loss.backward() + optimizer.step() + total_loss += loss.item() + n_batches += 1 + + avg_loss = total_loss / max(n_batches, 1) + writer.add_scalar('loss', avg_loss, epoch) + + if verbose and (epoch + 1) % 10 == 0: + print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}") + if avg_loss < best_loss: + best_loss = avg_loss + + writer.close() + if verbose: + print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}") + + return model, ref_mdp + + +def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50, + lr: float = 1e-3, margin: float = 0.3, run_name: str = None): + """Leave-one-out cross-validation given limited samples""" + joint = JointLoader(human_dir, agent_dir) + data = joint.get_data() + session_ids = list(data.keys()) + + joint_model = JointBehaviorModel(human_dir, agent_dir) + ref_mdp = joint_model.build_MDP() + + run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}" + writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}") + + predictions, actuals = [], [] + + for fold_idx, test_sid in enumerate(session_ids): + train_data = {k: v for k, v in data.items() if k != test_sid} + test_events = data[test_sid] + test_label = 0 if test_sid.startswith('human_') else 1 + + n_human = sum(1 for k in train_data if k.startswith('human_')) + n_agent = sum(1 for k in train_data if k.startswith('agent_')) + if n_human == 0 or n_agent == 0: + continue + + try: + dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5) + loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True) + + model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin) + model.to_device() + optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr) + + model.encoder.train() + model.classifier.train() + for _ in range(epochs_per_fold): + for anchor, positive, negative, labels in loader: + z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]] + loss = contrastive_loss(z_a, z_p, z_n, margin=margin) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + test_feat = featurize_trajectory(test_events, ref_mdp, input_dim) + pred = model.predict(test_feat.reshape(1, -1))[0] + predictions.append(pred) + actuals.append(test_label) + print(f" {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}") + + except Exception as e: + print(f"Error: {e}") + + if predictions: + acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions) + tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1) + fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0) + fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1) + prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1) + f1 = 2 * prec * rec / max(prec + rec, 1e-10) + writer.add_scalar('accuracy', acc, 0) + writer.add_scalar('f1', f1, 0) + writer.add_scalar('precision', prec, 0) + writer.add_scalar('recall', rec, 0) + writer.close() + print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}") + return acc, predictions, actuals + writer.close() + return 0.0, [], [] + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--mode', choices=['train', 'eval'], default='train') + parser.add_argument('--epochs', type=int, default=100) + parser.add_argument('--lr', type=float, default=1e-3) + parser.add_argument('--margin', type=float, default=0.3) + parser.add_argument('--input-dim', type=int, default=64) + parser.add_argument('--embed-dim', type=int, default=32) + parser.add_argument('--run-name', type=str, default=None) + args = parser.parse_args() + + if args.mode == 'train': + model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim, + embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name) + else: + evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs, + lr=args.lr, margin=args.margin, run_name=args.run_name) diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py new file mode 100644 index 0000000..00aba10 --- /dev/null +++ b/experiments/procesing/contaminator.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import os +import random +from pathlib import Path +from types import SimpleNamespace + +import pandas as pd + +from lib.separability import estimate_alpha, load_artifacts, score_session + + +# use relative import when in package context, fallback for standalone +try: + from sim.rl.behavior_loader.models import AgentBehaviorModel +except ImportError: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader")) + from models import AgentBehaviorModel + +# paths should be configurable via environment or relative to project root +PROJECT_ROOT = Path(__file__).parent.parent.parent +AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data")) + +try: + SEPARABILITY_ARTIFACTS = load_artifacts() +except FileNotFoundError: + SEPARABILITY_ARTIFACTS = None + + +def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame: + """remap column values according to mapping dict, preserving unmapped values""" + df = df.copy() + df[on] = df[on].map(mapping).fillna(df[on]) + return df + + +def _states_to_events(states: list[str]) -> list[SimpleNamespace]: + events: list[SimpleNamespace] = [] + for idx, state in enumerate(states): + parts = state.split("|") if isinstance(state, str) else ["page", "product", str(state)] + page = f"/{parts[0]}" if parts else "/" + product = parts[1] if len(parts) > 1 else "unknown" + event_name = parts[2] if len(parts) > 2 else parts[-1] + events.append( + SimpleNamespace( + eventName=event_name, + page=page, + productId=product, + ts=float(idx), + ) + ) + return events + +def contaminate_dataset(df: pd.DataFrame, on: str = "event_type", + contamination_rate: float = 0.1, + agent_data_dir: Path = None) -> pd.DataFrame: + """inject synthetic agent trajectories into a dataset + contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents) + """ + data_dir = agent_data_dir or AGENT_DATA_DIR + model = AgentBehaviorModel(str(data_dir)) + model.build_MDP() # ensure MDP is built before sampling + + # compute event distribution from original data + event_dist = df[on].value_counts(normalize=True).to_dict() + total = sum(event_dist.values()) + event_dist = {k: v / total for k, v in event_dist.items()} + + # calculate how many synthetic events to add + N = len(df) + N_final = N / (1 - contamination_rate) + N_contaminate = int(N_final - N) + + # sample start states weighted by original distribution + start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate) + + # generate synthetic trajectories + new_rows = [] + alpha_estimates = [] + + for start_event in start_events: + # sample trajectory from agent model, using a state that contains the event type + mdp_states = model.mdp.get('states', []) if model.mdp else [] + matching_starts = [s for s in mdp_states if start_event in s] + if not matching_starts: + continue # skip if no matching start state + start_state = random.choice(matching_starts) + trajectory = model.sample_traj(start_state, max_len=20) + score_payload: list[SimpleNamespace] = [] + score: dict[str, float] = {} + if SEPARABILITY_ARTIFACTS: + score_payload = _states_to_events(trajectory) + score = score_session(score_payload, SEPARABILITY_ARTIFACTS) + alpha_estimates.append( + estimate_alpha(score["prob_agent"], score["delta_h"], score["delta_a"], temperature=2.0) + ) + + for state in trajectory: + parts = state.split('|') if isinstance(state, str) else [start_event] + new_rows.append({ + on: parts[-1] if parts else start_event, + 'source': 'synthetic_agent', + 'prob_agent': score.get('prob_agent') if SEPARABILITY_ARTIFACTS and score_payload else None, + 'delta_h': score.get('delta_h') if SEPARABILITY_ARTIFACTS and score_payload else None, + 'delta_a': score.get('delta_a') if SEPARABILITY_ARTIFACTS and score_payload else None, + }) + + if new_rows: + contaminate_df = pd.DataFrame(new_rows) + df = pd.concat([df, contaminate_df], ignore_index=True) + if alpha_estimates: + df['estimated_alpha'] = sum(alpha_estimates) / len(alpha_estimates) + return df diff --git a/experiments/procesing/pricers/base.py b/experiments/procesing/pricers/base.py index 6569556..ecaabed 100644 --- a/experiments/procesing/pricers/base.py +++ b/experiments/procesing/pricers/base.py @@ -7,15 +7,6 @@ import pandas as pd class PricingFunction(ABC): """ Abstract base for pricing functions. - - Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1} - - Where: - Q_t ∈ R^n: demand vector at time t - P_t ∈ R^n: price vector at time t - S_t: session features (behavioral signals, interactions) - H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory - Objective: maximize E[R_T] = E[Σ P_t^T · Q_t] subject to: @@ -28,10 +19,10 @@ class PricingFunction(ABC): def fit(self, *kwargs): """ Offline training on historical data. + This is where we can think about some maximization of expected revenue + over historical trajectories to learn parameters of the pricing function. + (This however we cover move in the RL side of things) - Args: - historical_data: DataFrame with elasticity, prices, demand signals - **kwargs: additional training parameters """ pass @@ -39,12 +30,18 @@ class PricingFunction(ABC): def predict(self, *kwargs) -> np.ndarray: """ Generate optimal prices given current state. + This is an abstract method that transitions from τ -> P* + which is the mapping from the trajectory to optimal prices under + some subset of session grouping (so, per sessionId) + """ + pass - Args: - state_space: StateSpace object containing Q_t, P_t, S_t, H_t - + @abstractmethod + def _get_features(self, *kwargs) -> np.ndarray: + """ + Extract features from trajectory for pricing decision. Returns: - P_{t+1}: price vector in R^n + np.ndarray of shape (n_products, n_features) """ pass diff --git a/experiments/procesing/pricers/elasticity.py b/experiments/procesing/pricers/elasticity.py index b203159..3ce3b42 100644 --- a/experiments/procesing/pricers/elasticity.py +++ b/experiments/procesing/pricers/elasticity.py @@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction): # enforce bounds prices = np.clip(prices, self.price_floor, self.price_ceil) return prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity, demand, and demand deviation for each product""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 3)) + + demand = np.asarray(state_space.demand) + demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6) + return np.column_stack([self.elasticity, demand, demand_dev]) diff --git a/experiments/procesing/pricers/session_aware.py b/experiments/procesing/pricers/session_aware.py index 40343a7..dbc859f 100644 --- a/experiments/procesing/pricers/session_aware.py +++ b/experiments/procesing/pricers/session_aware.py @@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction): return prices + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity, demand, and session features""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 5)) + + demand = np.asarray(state_space.demand) + n_products = len(demand) + + # extract session features + velocity = 0.0 + view_depth = 0.0 + cart_to_view = 0.0 + + if not state_space.session_features.empty: + sf = state_space.session_features.iloc[0] + velocity = sf.get('interaction_velocity', 0.0) + view_depth = sf.get('product_view_depth', 0.0) + cart_to_view = sf.get('cart_to_view_ratio', 0.0) + + # broadcast session features to all products + features = np.column_stack([ + self.elasticity, + demand, + np.full(n_products, velocity), + np.full(n_products, view_depth), + np.full(n_products, cart_to_view) + ]) + return features + class ProductSpecificSessionPricer(PricingFunction): """ @@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction): prices = np.clip(base_prices, self.price_floor, self.price_ceil) return prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity and demand features for product-specific pricing""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 2)) + + demand = np.asarray(state_space.demand) + return np.column_stack([self.elasticity, demand]) diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py index 39be37a..d7fa699 100644 --- a/experiments/procesing/pricers/simple.py +++ b/experiments/procesing/pricers/simple.py @@ -3,6 +3,46 @@ import pandas as pd from procesing.pricers.base import PricingFunction +def session_features_to_demand(session_features: pd.DataFrame) -> float: + """ + Map session behavioral features to demand proxy. + THIS is the critical θ̂ → D transformation for rule-based pricing. + + Logic: + - High velocity → agent behavior → price up (revenue recovery) + - High cart ratio → purchase intent → price up + - Low activity → discount to convert + + Returns: demand proxy score (0-20 range, higher = more demand) + """ + if session_features.empty: + return 1.0 + + feat = session_features.iloc[0] if len(session_features) > 0 else {} + + velocity = feat.get('interaction_velocity', 0) + cart_ratio = feat.get('cart_to_view_ratio', 0) + item_views = feat.get('item_views', 0) + cart_adds = feat.get('cart_adds', 0) + + # baseline demand + demand = 1.0 + + # agent detection: high velocity → treat as high "demand" to price up + if velocity > 2.0: + demand += 10.0 # strong agent signal + + # conversion intent: cart interaction → price up + if cart_ratio > 0.1 or cart_adds > 0: + demand += 5.0 + + # browsing depth: many views → interest signal + if item_views > 3: + demand += min(item_views, 5.0) + + return min(demand, 20.0) # cap at 20 + + class StaticPricer(PricingFunction): """Static pricing: always return fixed base prices""" @@ -25,6 +65,11 @@ class StaticPricer(PricingFunction): raise ValueError("Must call fit() or provide base_prices in constructor") return self.base_prices.copy() + def _get_features(self, state_space=None) -> np.ndarray: + """Static pricer uses no features, returns empty array""" + n = len(self.base_prices) if self.base_prices is not None else 0 + return np.zeros((n, 0)) + class RandomPricer(PricingFunction): """Random pricing within bounds (for baseline comparison)""" @@ -47,6 +92,11 @@ class RandomPricer(PricingFunction): self.n_products = len(state_space.demand) return self.rng.uniform(self.price_min, self.price_max, size=self.n_products) + def _get_features(self, state_space=None) -> np.ndarray: + """Random pricer uses no features""" + n = self.n_products if self.n_products else 0 + return np.zeros((n, 0)) + class SimpleSurgePricer(PricingFunction): """ @@ -67,21 +117,25 @@ class SimpleSurgePricer(PricingFunction): self.surge_multiplier = surge_multiplier self.discount_multiplier = discount_multiplier - def fit(self, market_data : pd.DataFrame): + def fit(self, market_data: pd.DataFrame): """Extract base prices from product catalog or historical averages""" self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values - self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices) + return self - def predict(self) -> np.ndarray: + def predict(self, state_space) -> np.ndarray: """ Adjust prices based on current demand using surge rules. - state_space.demand: demand counts per product - state_space.prices: current prices (fallback if base_prices not set) + state_space.demand: demand proxy per product (from session features) + state_space.prices: base prices """ - current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99 - demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices) - new_prices = current_prices.copy() + demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0]) + base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices + if base is None: + base = np.ones(len(demand)) * 99.99 + + # ensure float dtype to allow multiplication by float multipliers + new_prices = base.astype(np.float64).copy() high_mask = demand >= self.high_threshold new_prices[high_mask] *= self.surge_multiplier @@ -89,3 +143,16 @@ class SimpleSurgePricer(PricingFunction): new_prices[low_mask] *= self.discount_multiplier return new_prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract demand and base price features for each product""" + if state_space is None: + n = len(self.base_prices) if self.base_prices is not None else 0 + return np.zeros((n, 2)) + + demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0]) + base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices + if base is None: + base = np.ones(len(demand)) * 99.99 + + return np.column_stack([demand, base]) diff --git a/experiments/procesing/steps/session.py b/experiments/procesing/steps/session.py index 4b950aa..ec6f27c 100644 --- a/experiments/procesing/steps/session.py +++ b/experiments/procesing/steps/session.py @@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep): Vectorized session feature extraction - replaces O(n^2) per-row loop. Input: interactions_df Output: session-level feature matrix + THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation. """ def transform(self, X: pd.DataFrame) -> pd.DataFrame: diff --git a/experiments/procesing/tests/test_demand.py b/experiments/procesing/tests/test_demand.py index 18dce5d..d964da2 100644 --- a/experiments/procesing/tests/test_demand.py +++ b/experiments/procesing/tests/test_demand.py @@ -6,6 +6,7 @@ from procesing.steps import ( ) def test_compute_demand(pipeline_context): + random.seed(42) # deterministic test step = ComputeDemandStep(context=pipeline_context) # Test with normal interaction data @@ -26,6 +27,7 @@ def test_compute_demand(pipeline_context): def test_compute_demand_skewed(pipeline_context): + random.seed(42) # deterministic test step = ComputeDemandStep(context=pipeline_context) # Test with normal interaction data diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..7f8ec2d --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1,41 @@ +"""PHANTOM shared library +Exports unified utilities for features, state, config, kafka, and model registry +""" +from .config import ( + PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR, + AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR, + COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR, + ensure_dir, get_data_path, get_experiments_path, get_sim_path, + KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER, + REDIS_HOST, REDIS_PORT, + SUPABASE_URL, SUPABASE_ANON_KEY, + BACKEND_PORT, PROVIDER_PORT +) +from .state import ( + make_state_repr, event_to_state, parse_state, + get_event_name, get_timestamp, + create_state_fn, create_event_name_fn, create_timestamp_fn +) +from .features import ( + transition_histogram, temporal_signature, state_coverage, transition_entropy, + event_type_distribution, featurize_trajectory, parse_timestamp +) + +__all__ = [ + # config + 'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR', + 'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR', + 'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR', + 'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path', + 'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER', + 'REDIS_HOST', 'REDIS_PORT', + 'SUPABASE_URL', 'SUPABASE_ANON_KEY', + 'BACKEND_PORT', 'PROVIDER_PORT', + # state + 'make_state_repr', 'event_to_state', 'parse_state', + 'get_event_name', 'get_timestamp', + 'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn', + # features + 'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy', + 'event_type_distribution', 'featurize_trajectory', 'parse_timestamp', +] diff --git a/lib/config.py b/lib/config.py new file mode 100644 index 0000000..a27ffd9 --- /dev/null +++ b/lib/config.py @@ -0,0 +1,65 @@ +"""Unified path configuration for PHANTOM project +All hardcoded paths should reference this module +Paths can be overridden via environment variables +""" +import os +from pathlib import Path + +# project root (directory containing lib/, experiments/, sim/, web/, backend/) +PROJECT_ROOT = Path(__file__).parent.parent.resolve() + +# data directories +DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data')) +EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments')) + +# agent/human interaction data +AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents')) +HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans')) + +# RL simulation runs +SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs')) + +# model artifacts +MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models')) + +# collected experiment data +COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data')) + +# notebook outputs +NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs')) + + +def ensure_dir(path: Path) -> Path: + """ensure directory exists, create if needed""" + path.mkdir(parents=True, exist_ok=True) + return path + + +def get_data_path(*parts: str) -> Path: + """construct path relative to DATA_DIR""" + return DATA_DIR.joinpath(*parts) + + +def get_experiments_path(*parts: str) -> Path: + """construct path relative to EXPERIMENTS_DIR""" + return EXPERIMENTS_DIR.joinpath(*parts) + + +def get_sim_path(*parts: str) -> Path: + """construct path relative to SIM_RUNS_DIR""" + return SIM_RUNS_DIR.joinpath(*parts) + + +# service configuration (from .env) +KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost') +KAFKA_PORT = os.getenv('KAFKA_PORT', '9092') +KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}" + +REDIS_HOST = os.getenv('REDIS_HOST', 'localhost') +REDIS_PORT = int(os.getenv('REDIS_PORT', '6379')) + +SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '') +SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '') + +BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000')) +PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001')) diff --git a/lib/features.py b/lib/features.py new file mode 100644 index 0000000..f2d88f5 --- /dev/null +++ b/lib/features.py @@ -0,0 +1,125 @@ +"""Unified featurization utilities for trajectory -> feature vector conversion +Used by both experiments/ml/ and sim/rl/ components +""" +import numpy as np +from collections import defaultdict +from typing import List, Dict, Callable, Optional, Any, Set +from datetime import datetime + + +def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray: + """compute normalized histogram of state transitions in trajectory + events: list of event objects/dicts + state_fn: function mapping event -> state string + max_states: maximum dimensions for histogram + """ + if len(events) < 2: + return np.zeros(max_states, dtype=np.float32) + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32) + hist = np.pad(hist, (0, max(0, max_states - len(hist)))) + return hist / (total + 1e-10) + + +def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray: + """extract temporal features: mean/std/skew of inter-event times plus count + events: list of event objects/dicts + ts_fn: function mapping event -> timestamp (float seconds) + returns: [mean_dt, std_dt, skew, n_intervals] array + """ + if len(events) < 2: + return np.zeros(4, dtype=np.float32) + times = sorted([ts_fn(e) for e in events]) + diffs = np.diff(times).astype(np.float32) + if len(diffs) == 0: + return np.zeros(4, dtype=np.float32) + mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10 + skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0 + return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32) + + +def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float: + """fraction of MDP states visited by trajectory + events: list of event objects/dicts + state_fn: function mapping event -> state string + mdp_states: set of all possible MDP states + """ + if not mdp_states: + return 0.0 + visited = set(state_fn(e) for e in events) + return len(visited & mdp_states) / len(mdp_states) + + +def transition_entropy(events: List, state_fn: Callable) -> float: + """compute entropy of transition distribution (randomness of navigation) + higher entropy = more random browsing pattern + """ + if len(events) < 2: + return 0.0 + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + probs = [c / total for c in trans_counts.values()] + return -sum(p * np.log(p + 1e-10) for p in probs) + + +def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray: + """compute proportions of different event type categories + returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio] + """ + if not events: + return np.zeros(4, dtype=np.float32) + n = len(events) + names = [event_name_fn(e).lower() for e in events] + return np.array([ + sum(1 for nm in names if 'page' in nm or 'view' in nm) / n, + sum(1 for nm in names if 'hover' in nm) / n, + sum(1 for nm in names if 'cart' in nm) / n, + sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n + ], dtype=np.float32) + + +def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable, + event_name_fn: Callable, mdp_states: Optional[Set[str]] = None, + output_dim: int = 64) -> np.ndarray: + """convert trajectory to fixed-dimension feature vector + events: list of event objects/dicts + state_fn: function mapping event -> state string + ts_fn: function mapping event -> timestamp (float) + event_name_fn: function mapping event -> event name string + mdp_states: optional set of all MDP states for coverage calculation + output_dim: desired output dimension (will pad/truncate) + """ + feats = [] + feats.extend(transition_histogram(events, state_fn, max_states=40)) # 40 dims + feats.extend(temporal_signature(events, ts_fn)) # 4 dims + feats.append(state_coverage(events, state_fn, mdp_states or set())) # 1 dim + feats.append(transition_entropy(events, state_fn)) # 1 dim + feats.append(float(len(events))) # trajectory length + feats.append(float(len(set(state_fn(e) for e in events)))) # unique states + feats.extend(event_type_distribution(events, event_name_fn)) # 4 dims + + feats = np.array(feats[:output_dim], dtype=np.float32) + if len(feats) < output_dim: + feats = np.pad(feats, (0, output_dim - len(feats))) + return feats + + +def parse_timestamp(ts: Any) -> float: + """parse various timestamp formats to float seconds""" + if ts is None: + return 0.0 + if isinstance(ts, (int, float)): + return float(ts) + if isinstance(ts, str): + try: + return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp() + except ValueError: + return 0.0 + return 0.0 diff --git a/lib/kafka_client.py b/lib/kafka_client.py new file mode 100755 index 0000000..d61cd9e --- /dev/null +++ b/lib/kafka_client.py @@ -0,0 +1,54 @@ +from kafka import KafkaConsumer +import json +import os +from dotenv import load_dotenv +load_dotenv() + +def get_interactions( + topic='user-interactions', + bootstrap_servers=None, + from_beginning=True, + max_records=None, + timeout_ms=5000 +): + """Consume interaction events from Kafka. + + Args: + topic: Kafka topic name + bootstrap_servers: Kafka broker address (default from env) + from_beginning: Start from earliest offset if True + max_records: Max number of records to fetch (None = all available) + timeout_ms: Consumer poll timeout + + Returns: + List of parsed interaction event dicts + """ + if not bootstrap_servers: + host = os.getenv('KAFKA_HOST', 'localhost') + port = os.getenv('KAFKA_PORT', '9092') + bootstrap_servers = f'{host}:{port}' + + consumer = KafkaConsumer( + topic, + bootstrap_servers=bootstrap_servers, + auto_offset_reset='earliest' if from_beginning else 'latest', + enable_auto_commit=False, + value_deserializer=lambda m: json.loads(m.decode('utf-8')), + consumer_timeout_ms=timeout_ms + ) + + events = [] + try: + for msg in consumer: + events.append(msg.value) + if max_records and len(events) >= max_records: + break + finally: + consumer.close() + + return events + +if __name__ == '__main__': + interactions = get_interactions(max_records=10) + for event in interactions: + print(event) diff --git a/lib/model_registry.py b/lib/model_registry.py index 92d7934..e833a1a 100755 --- a/lib/model_registry.py +++ b/lib/model_registry.py @@ -178,3 +178,49 @@ class ModelRegistry: return True except: return False + + def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800): + """ + Store prices for a specific session. + THIS is the write path for session-aware pricing. + + Args: + session_id: session identifier + prices: dict of {productId: price} + ttl: time-to-live in seconds (default 30min) + """ + if not prices: + return + + key = f"session:{session_id}:prices" + # use Redis hash for O(1) lookup per product + self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()}) + self.redis_client.expire(key, ttl) + + def get_session_price(self, session_id: str, product_id: str) -> Optional[float]: + """ + Lookup price for (sessionId, productId). + THIS is the read path for fast provider lookup. + + Returns: price or None if not found + """ + key = f"session:{session_id}:prices" + price_str = self.redis_client.hget(key, product_id) + + if price_str is None: + return None + + return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str) + + def get_session_all_prices(self, session_id: str) -> Dict[str, float]: + """Get all prices for a session.""" + key = f"session:{session_id}:prices" + prices_raw = self.redis_client.hgetall(key) + + if not prices_raw: + return {} + + return { + (k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v) + for k, v in prices_raw.items() + } diff --git a/lib/state.py b/lib/state.py new file mode 100644 index 0000000..cfb4251 --- /dev/null +++ b/lib/state.py @@ -0,0 +1,72 @@ +"""Unified state representation utilities for MDP state encoding +Used by both experiments/ and sim/ components for consistent state handling +""" +from typing import Any, Callable + + +def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str: + """create canonical state representation string from components + format: page|productId|eventName + """ + p = page or 'unk' + pid = product_id or 'none' + en = event_name or 'unknown' + return f"{p}|{pid}|{en}" + + +def event_to_state(evt: Any) -> str: + """convert event object/dict to state string + supports both object attributes and dict keys + """ + if isinstance(evt, dict): + return make_state_repr( + page=evt.get('page'), + product_id=evt.get('productId'), + event_name=evt.get('eventName') or evt.get('event_type') + ) + return make_state_repr( + page=getattr(evt, 'page', None), + product_id=getattr(evt, 'productId', None), + event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) + ) + + +def parse_state(state_str: str) -> dict: + """parse state string back to components + returns: {'page': str, 'productId': str, 'eventName': str} + """ + parts = state_str.split('|') + return { + 'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None, + 'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None, + 'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None + } + + +def get_event_name(evt: Any) -> str: + """extract event name from event object/dict""" + if isinstance(evt, dict): + return evt.get('eventName') or evt.get('event_type') or '' + return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or '' + + +def get_timestamp(evt: Any) -> Any: + """extract timestamp from event object/dict""" + if isinstance(evt, dict): + return evt.get('ts') or evt.get('timestamp') + return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None) + + +def create_state_fn() -> Callable: + """factory for state representation function""" + return event_to_state + + +def create_event_name_fn() -> Callable: + """factory for event name extraction function""" + return get_event_name + + +def create_timestamp_fn() -> Callable: + """factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)""" + return get_timestamp diff --git a/sim/case/__init__.py b/sim/case/__init__.py new file mode 100644 index 0000000..cb6c13c --- /dev/null +++ b/sim/case/__init__.py @@ -0,0 +1,2 @@ +"""Case-specific simulations and experiments.""" + diff --git a/sim/case/thesis_simplified/__init__.py b/sim/case/thesis_simplified/__init__.py new file mode 100644 index 0000000..6259958 --- /dev/null +++ b/sim/case/thesis_simplified/__init__.py @@ -0,0 +1,2 @@ +"""Minimal thesis-aligned pricing simulation (self-contained).""" + diff --git a/sim/case/thesis_simplified/coi.py b/sim/case/thesis_simplified/coi.py new file mode 100644 index 0000000..1657f65 --- /dev/null +++ b/sim/case/thesis_simplified/coi.py @@ -0,0 +1,125 @@ +"""Cost of Information (COI) computation for thesis pricing system. + +Core KPI: COI = E[p_shown] - p_min measures pricing power from information asymmetry. +Theorem 1 shows COI erodes as agent queries increase: as N->inf, p^(1)->p_min. +""" +from __future__ import annotations +from dataclasses import dataclass +from typing import Dict, List, TYPE_CHECKING +import numpy as np + +if TYPE_CHECKING: + from .simplified import Session + + +@dataclass(frozen=True) +class COIWindow: + """Windowed COI metrics computed from realized price exposures. + + policy: E[p_shown] - cost, the definition-level KPI + agent: E[p^(1)] - cost where p^(1) is min price under agent querying + leak: max(policy - agent, 0), observable gap from reconnaissance + survival_ratio: agent/policy, fraction of pricing power retained + """ + policy: float + agent: float + leak: float + survival_ratio: float + policy_by_product: np.ndarray + agent_by_product: np.ndarray + demand_weights: np.ndarray + + +def aggregate_prices(sessions: List["Session"], mode: str = "all") -> Dict[int, List[float] | float]: + """Unified price aggregation across sessions. + + mode: "all" returns all prices per product, "min_per_session" returns min price per session per product, + "min_across" returns single min price per product + """ + if mode == "min_across": + mins: Dict[int, float] = {} + for s in sessions: + for e in s.events: + pidx, price = int(e.product_idx), float(e.price_seen) + mins[pidx] = min(mins.get(pidx, price), price) + return mins + elif mode == "min_per_session": + result: Dict[int, List[float]] = {} + for s in sessions: + by_p: Dict[int, float] = {} + for e in s.events: + pidx, price = int(e.product_idx), float(e.price_seen) + by_p[pidx] = min(by_p.get(pidx, price), price) + for pidx, pmin in by_p.items(): + result.setdefault(pidx, []).append(pmin) + return result + else: # "all" + prices: Dict[int, List[float]] = {} + for s in sessions: + for e in s.events: + prices.setdefault(e.product_idx, []).append(float(e.price_seen)) + return prices + + +def demand_weights_by_product(sessions: List["Session"], demand_mapping: Dict[str, float], n_products: int) -> np.ndarray: + """Compute demand-weighted importance per product.""" + w = np.zeros(n_products, dtype=float) + sessions_by_id = {s.sid: s for s in sessions} + for sid, q in demand_mapping.items(): + sess = sessions_by_id.get(sid) + if sess and sess.events: + w[int(sess.events[0].product_idx)] += float(q) + total = float(np.sum(w)) + return (w / total) if total > 0 else w + + +def compute_coi_window(sessions: List["Session"], costs: np.ndarray, demand_mapping: Dict[str, float] | None = None) -> COIWindow: + """Compute COI metrics over session window. + + Aggregates price exposures and computes policy-level vs agent-realized COI. + """ + n = int(len(costs)) + prices = aggregate_prices(sessions, mode="all") + agent_sessions = [s for s in sessions if s.actor == "A"] + agent_min = aggregate_prices(agent_sessions, mode="min_across") if agent_sessions else {} + + policy_by = np.zeros(n, dtype=float) + agent_by = np.zeros(n, dtype=float) + seen = np.array([(i in prices) for i in range(n)], dtype=bool) + agent_seen = np.array([(i in agent_min) for i in range(n)], dtype=bool) + + for pidx, ps in prices.items(): + if 0 <= pidx < n and ps: + policy_by[pidx] = float(np.mean(ps) - float(costs[pidx])) + for pidx, pmin in agent_min.items(): + if 0 <= pidx < n: + agent_by[pidx] = float(pmin - float(costs[pidx])) + + agent_by[seen & ~agent_seen] = policy_by[seen & ~agent_seen] # no erosion if no agent exposure + + demand_w = demand_weights_by_product(sessions, demand_mapping, n) if demand_mapping else np.zeros(n, dtype=float) + has_weights = float(np.sum(demand_w)) > 0 + + if has_weights: + policy, agent = float(np.dot(demand_w, policy_by)), float(np.dot(demand_w, agent_by)) + elif np.any(seen): + policy, agent = float(np.mean(policy_by[seen])), float(np.mean(agent_by[seen])) + else: + policy, agent = 0.0, 0.0 + + leak = float(max(policy - agent, 0.0)) + survival = float(np.clip(agent / policy, 0.0, 1.0)) if policy > 0 else 0.0 + + return COIWindow(policy=policy, agent=agent, leak=leak, survival_ratio=survival, + policy_by_product=policy_by, agent_by_product=agent_by, demand_weights=demand_w) + + +def coi_erosion(coi_policy: float, coi_agent: float, eps: float = 1e-9) -> float: + """Thesis-consistent COI erosion: fraction of pricing power destroyed by agent queries. + + erosion = 1 - (COI_agent / COI_policy) + When agents find low prices, COI_agent -> 0, erosion -> 1. + """ + if coi_policy <= eps: + return 0.0 + return float(np.clip(1.0 - (coi_agent / (coi_policy + eps)), 0.0, 1.0)) diff --git a/sim/case/thesis_simplified/experiments.py b/sim/case/thesis_simplified/experiments.py new file mode 100644 index 0000000..74458d7 --- /dev/null +++ b/sim/case/thesis_simplified/experiments.py @@ -0,0 +1,325 @@ +"""COI leakage experiments and policy comparisons. + +Demonstrates the core thesis contribution: COI erosion under agent contamination +and recovery via robust pricing policies. + +Generates TensorBoard logs for: +- COI erosion curves across contamination levels +- Policy comparison (fixed vs adaptive vs RL) +- Revenue/margin trade-offs +""" +from __future__ import annotations +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Tuple +import json +import numpy as np + +try: + from torch.utils.tensorboard import SummaryWriter + HAS_TB = True +except ImportError: + HAS_TB = False + +from .simplified_env import PricingEnv, EnvConfig, make_env +from .simplified import System + + +@dataclass +class ExperimentResult: + """Container for experiment metrics.""" + name: str + alpha: float + reward_mean: float + reward_std: float + coi_erosion: float + alpha_error: float + revenue: float + margin: float + + def to_dict(self) -> dict: + return {k: getattr(self, k) for k in self.__dataclass_fields__} + + +def theoretical_coi_erosion_curve(alphas: np.ndarray, n_sessions: int = 1000) -> np.ndarray: + """Theoretical COI erosion from Theorem 1 using order statistic model. + + For N i.i.d. uniform queries on [p_min, p_max]: + E[p^(1)] = p_min + (p_max - p_min)/(N+1), so erosion = 1 - 2/(N+1) + """ + erosions = [] + for a in alphas: + n_agents = max(1, int(a * n_sessions)) + erosions.append(1.0 - 2.0 / (n_agents + 1)) + return np.array(erosions) + + +def run_policy_episode( + env: PricingEnv, + policy_fn, + n_episodes: int = 10 +) -> Tuple[List[float], List[float], List[float], List[float]]: + """Run policy and collect per-step metrics.""" + rewards, coi_erosions, alpha_errors, revenues = [], [], [], [] + + for _ in range(n_episodes): + obs, info = env.reset() + done = False + while not done: + action = policy_fn(obs, env.n) + obs, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated + rewards.append(reward) + if 'coi_erosion' in info: + coi_erosions.append(info['coi_erosion']) + if 'alpha_true' in info and 'alpha_est' in info: + alpha_errors.append(abs(info['alpha_true'] - info['alpha_est'])) + if 'revenue' in info: + revenues.append(info['revenue']) + + return rewards, coi_erosions, alpha_errors, revenues + + +class PolicyRegistry: + """Registry of baseline policies.""" + + @staticmethod + def fixed(obs: np.ndarray, n: int, margin: float = 0.15) -> np.ndarray: + return np.ones(n, dtype=np.float32) * (1.0 + margin) + + @staticmethod + def random(obs: np.ndarray, n: int, rng: np.random.Generator = None) -> np.ndarray: + rng = rng or np.random.default_rng() + return rng.uniform(0.7, 1.3, n).astype(np.float32) + + @staticmethod + def adaptive(obs: np.ndarray, n: int, base_margin: float = 0.15) -> np.ndarray: + """Reduce margins when alpha estimate is high.""" + alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2 + margin_scale = 1.0 - 0.4 * alpha_est + return np.ones(n, dtype=np.float32) * (1.0 + base_margin * margin_scale) + + @staticmethod + def aggressive(obs: np.ndarray, n: int) -> np.ndarray: + """High margins, ignores contamination.""" + return np.ones(n, dtype=np.float32) * 1.4 + + @staticmethod + def defensive(obs: np.ndarray, n: int) -> np.ndarray: + """Low margins, always cautious.""" + return np.ones(n, dtype=np.float32) * 1.05 + + @staticmethod + def alpha_proportional(obs: np.ndarray, n: int, max_margin: float = 0.3) -> np.ndarray: + """Margin inversely proportional to estimated alpha.""" + alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2 + margin = max_margin * (1.0 - alpha_est) + return np.ones(n, dtype=np.float32) * (1.0 + margin) + + +def run_contamination_sweep( + alphas: List[float], + policies: Dict[str, callable], + n_products: int = 10, + max_steps: int = 200, + n_episodes: int = 10, + seed: int = 42, + log_dir: str = None +) -> Dict[str, List[ExperimentResult]]: + """Run policies across contamination levels.""" + + results = {name: [] for name in policies} + writer = SummaryWriter(Path(log_dir) / "sweep") if log_dir and HAS_TB else None + + for alpha in alphas: + print(f" alpha={alpha:.2f}", end=" ") + env_cfg = EnvConfig( + n_products=n_products, max_steps=max_steps, + alpha_true=alpha, reward_mode="robust", seed=seed) + env = make_env(env_cfg) + + for name, policy_fn in policies.items(): + rewards, coi_vals, alpha_errs, revenues = run_policy_episode(env, policy_fn, n_episodes) + + result = ExperimentResult( + name=name, alpha=alpha, + reward_mean=float(np.mean(rewards)), + reward_std=float(np.std(rewards)), + coi_erosion=float(np.mean(coi_vals)) if coi_vals else 0.0, + alpha_error=float(np.mean(alpha_errs)) if alpha_errs else 0.0, + revenue=float(np.mean(revenues)) if revenues else 0.0, + margin=float(np.mean([policy_fn(np.zeros(3 * n_products + 3), n_products)]) - 1.0)) + + results[name].append(result) + + if writer: + step = int(alpha * 100) + writer.add_scalar(f'{name}/reward', result.reward_mean, step) + writer.add_scalar(f'{name}/coi_erosion', result.coi_erosion, step) + writer.add_scalar(f'{name}/alpha_error', result.alpha_error, step) + writer.add_scalar(f'{name}/revenue', result.revenue, step) + + print(f"done") + + # add theoretical curve + if writer: + theo = theoretical_coi_erosion_curve(np.array(alphas)) + for i, (a, e) in enumerate(zip(alphas, theo)): + writer.add_scalar('theoretical/coi_erosion', e, int(a * 100)) + writer.close() + + return results + + +def run_coi_demonstration(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict: + """Main COI demonstration experiment.""" + print("=== COI Leakage Demonstration ===\n") + + Path(log_dir).mkdir(parents=True, exist_ok=True) + writer = SummaryWriter(Path(log_dir) / "coi_demo") if HAS_TB else None + + # theoretical erosion curve + print("1. Theoretical COI erosion (Theorem 1)") + alphas = np.linspace(0.0, 0.6, 13) + theo_erosion = theoretical_coi_erosion_curve(alphas, n_sessions=1000) + + for a, e in zip(alphas, theo_erosion): + print(f" alpha={a:.2f} -> erosion={e:.3f}") + if writer: + writer.add_scalar('theory/coi_erosion', e, int(a * 100)) + + # policy comparison + print("\n2. Policy comparison across contamination levels") + policies = { + 'fixed': lambda obs, n: PolicyRegistry.fixed(obs, n), + 'aggressive': PolicyRegistry.aggressive, + 'defensive': PolicyRegistry.defensive, + 'adaptive': PolicyRegistry.adaptive, + 'alpha_proportional': PolicyRegistry.alpha_proportional, + } + + sweep_alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5] + results = run_contamination_sweep( + sweep_alphas, policies, n_products=10, max_steps=100, + n_episodes=5, seed=seed, log_dir=log_dir) + + # summarize + print("\n3. Summary by policy") + for name, res_list in results.items(): + avg_reward = np.mean([r.reward_mean for r in res_list]) + avg_coi = np.mean([r.coi_erosion for r in res_list]) + print(f" {name:20s}: avg_reward={avg_reward:.2f}, avg_coi={avg_coi:.3f}") + + # save results + output = { + 'theoretical': {'alphas': alphas.tolist(), 'erosion': theo_erosion.tolist()}, + 'empirical': {name: [r.to_dict() for r in res_list] for name, res_list in results.items()}} + + with open(Path(log_dir) / "coi_demo_results.json", 'w') as f: + json.dump(output, f, indent=2) + + if writer: + writer.close() + + print(f"\nResults saved to {log_dir}/coi_demo_results.json") + print(f"TensorBoard: tensorboard --logdir {log_dir}") + + return output + + +def run_reward_mode_comparison(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict: + """Compare different reward modes.""" + print("=== Reward Mode Comparison ===\n") + + Path(log_dir).mkdir(parents=True, exist_ok=True) + writer = SummaryWriter(Path(log_dir) / "reward_modes") if HAS_TB else None + + reward_modes = ["revenue", "profit", "robust", "coi_aware"] + alpha = 0.3 # moderate contamination + + results = {} + for mode in reward_modes: + print(f" mode={mode}", end=" ") + env_cfg = EnvConfig( + n_products=10, max_steps=200, alpha_true=alpha, + reward_mode=mode, seed=seed) + env = make_env(env_cfg) + + rewards, coi_vals, _, revenues = run_policy_episode( + env, PolicyRegistry.adaptive, n_episodes=10) + + results[mode] = { + 'reward_mean': float(np.mean(rewards)), + 'reward_std': float(np.std(rewards)), + 'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0, + 'revenue': float(np.mean(revenues)) if revenues else 0.0} + + if writer: + for k, v in results[mode].items(): + writer.add_scalar(f'{mode}/{k}', v, 0) + + print(f"reward={results[mode]['reward_mean']:.2f}, coi={results[mode]['coi_erosion']:.3f}") + + if writer: + writer.close() + + with open(Path(log_dir) / "reward_mode_results.json", 'w') as f: + json.dump(results, f, indent=2) + + return results + + +def run_alpha_drift_experiment(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict: + """Test policy robustness under non-stationary contamination.""" + print("=== Alpha Drift Experiment ===\n") + + Path(log_dir).mkdir(parents=True, exist_ok=True) + writer = SummaryWriter(Path(log_dir) / "alpha_drift") if HAS_TB else None + + drift_rates = [0.0, 0.01, 0.02, 0.05] + results = {} + + for drift in drift_rates: + print(f" drift={drift:.2f}", end=" ") + env_cfg = EnvConfig( + n_products=10, max_steps=200, alpha_true=0.2, + alpha_drift=drift, reward_mode="robust", seed=seed) + env = make_env(env_cfg) + + rewards, coi_vals, alpha_errs, _ = run_policy_episode( + env, PolicyRegistry.adaptive, n_episodes=10) + + results[f'drift_{drift}'] = { + 'reward_mean': float(np.mean(rewards)), + 'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0, + 'alpha_tracking_error': float(np.mean(alpha_errs)) if alpha_errs else 0.0} + + if writer: + for k, v in results[f'drift_{drift}'].items(): + writer.add_scalar(f'drift_{drift}/{k}', v, 0) + + print(f"reward={results[f'drift_{drift}']['reward_mean']:.2f}, " + f"alpha_err={results[f'drift_{drift}']['alpha_tracking_error']:.3f}") + + if writer: + writer.close() + + return results + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Run COI experiments") + parser.add_argument("--exp", type=str, default="coi", choices=["coi", "reward", "drift", "all"]) + parser.add_argument("--log-dir", type=str, default="sim/case/thesis_simplified/runs") + parser.add_argument("--seed", type=int, default=42) + args = parser.parse_args() + + if args.exp == "coi" or args.exp == "all": + run_coi_demonstration(args.log_dir, args.seed) + + if args.exp == "reward" or args.exp == "all": + run_reward_mode_comparison(args.log_dir, args.seed) + + if args.exp == "drift" or args.exp == "all": + run_alpha_drift_experiment(args.log_dir, args.seed) diff --git a/sim/case/thesis_simplified/separability.py b/sim/case/thesis_simplified/separability.py new file mode 100644 index 0000000..eaabaa3 --- /dev/null +++ b/sim/case/thesis_simplified/separability.py @@ -0,0 +1,72 @@ +"""Behavioral separability for human/agent detection. + +Computes divergence signals delta_H, delta_A from session trajectories using +transition kernel estimation and KL divergence to prototype behavioral profiles. +""" +from __future__ import annotations +from typing import Dict, List, Tuple, TYPE_CHECKING +import numpy as np + +if TYPE_CHECKING: + from .simplified import Event, Session + + +# prototype behavioral kernels for human vs agent sessions +TRANS_H = { + "start": {"view": 0.85, "end": 0.15}, + "view": {"detail": 0.4, "cart": 0.3, "view": 0.2, "end": 0.1}, + "detail": {"cart": 0.5, "view": 0.3, "end": 0.2}, + "cart": {"purchase": 0.6, "view": 0.25, "end": 0.15}, + "purchase": {"end": 1.0}, +} + +TRANS_A = { + "start": {"view": 0.95, "end": 0.05}, + "view": {"detail": 0.6, "view": 0.25, "cart": 0.1, "end": 0.05}, + "detail": {"view": 0.5, "cart": 0.15, "detail": 0.3, "end": 0.05}, + "cart": {"view": 0.4, "purchase": 0.2, "end": 0.4}, + "purchase": {"end": 1.0}, +} + + +def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float: + """KL divergence D_KL(p || q) for discrete distributions.""" + keys = set(p.keys()) | set(q.keys()) + return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys) + + +def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]: + """Build empirical transition kernel T' from trajectory events.""" + trans: Dict[str, Dict[str, int]] = {} + prev = "start" + for e in events: + curr = e.action + trans.setdefault(prev, {}) + trans[prev][curr] = trans[prev].get(curr, 0) + 1 + prev = curr + return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0} + + +def compute_divergence(session: "Session") -> Tuple[float, float]: + """Compute divergence signals delta_H, delta_A for session. + + delta_H = mean KL(T' || T_H) across states, measures distance to human prototype + delta_A = mean KL(T' || T_A) across states, measures distance to agent prototype + """ + kernel = build_kernel(session.events) + if not kernel: + return 0.5, 0.5 + delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel) + delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel) + return delta_h, delta_a + + +def estimate_alpha(session: "Session", beta: float = 2.0) -> float: + """Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)). + + Returns probability session is agent-generated based on behavioral divergence. + """ + dh, da = compute_divergence(session) + if (dh + da) <= 0: + return 0.5 + return 1.0 / (1.0 + np.exp(-beta * (dh - da))) diff --git a/sim/case/thesis_simplified/simplified.py b/sim/case/thesis_simplified/simplified.py new file mode 100644 index 0000000..450f01a --- /dev/null +++ b/sim/case/thesis_simplified/simplified.py @@ -0,0 +1,219 @@ +"""Minimal implementation of thesis pricing system. + +Implements the core loop: prices -> sessions -> demand -> prices +with behavioral separability and robust pricing objective. + +Objects: +- Session trajectories tau_s from mixture of H/A behavioral profiles +- Demand proxy q_hat via weighted action aggregation +- COI leakage penalty for agent reconnaissance +- Limbo: alternating price/demand history for trajectory analysis +""" +from __future__ import annotations +from dataclasses import dataclass, field +from typing import Dict, List, Tuple +import numpy as np + +from .coi import COIWindow, compute_coi_window +from .separability import TRANS_H, TRANS_A, kl_div, build_kernel, compute_divergence, estimate_alpha + +ACTION_WEIGHTS = {"add_to_cart": 0.8, "checkout": 0.9, "purchase": 1.0, "view": 0.15, "detail": 0.25, "hover": 0.3, "start": 0.05, "end": 0.0} + + +@dataclass +class Event: + action: str + product_idx: int + price_seen: float + ts: float + + +@dataclass +class Session: + sid: str + events: List[Event] + actor: str # H or A (ground truth label) + theta: Dict[str, float] = field(default_factory=dict) + + +def compute_demand(session: Session) -> float: + """Compute demand proxy q_hat = sum_k omega(a_k) for session.""" + return sum(ACTION_WEIGHTS.get(e.action, 0.1) for e in session.events) + + +def sample_trajectory(rng: np.random.Generator, trans: Dict, prices: np.ndarray, costs: np.ndarray, theta: Dict[str, float], + is_agent: bool, session_noise: float = 0.02, surge: float = 0.08, max_mult: float = 1.8) -> Tuple[List[Event], int]: + """Sample session trajectory from behavioral kernel.""" + pidx = int(rng.integers(0, len(prices))) + cost, base = float(costs[pidx]), float(prices[pidx]) * (1.0 + rng.normal(0.0, session_noise)) + base = float(np.clip(base, cost * 1.01, float(prices[pidx]) * 2.0)) + price, signal, state, t = base, 0.0, "start", 0.0 + events = [] + + while state != "end" and len(events) < 30: + probs = trans.get(state, {"end": 1.0}) + nxt = rng.choice(list(probs.keys()), p=list(probs.values())) + if nxt == "purchase": # purchase conversion check + rel = max((price - cost) / (cost + 1e-6), 0.0) + p_buy = float(np.clip(theta.get("base_conv", 0.2) * np.exp(-theta.get("price_sens", 2.0) * rel), 0.0, 1.0)) + if rng.random() > p_buy: + nxt = "end" + state = nxt + if state not in {"start", "end"}: + events.append(Event(action=state, product_idx=pidx, price_seen=float(price), ts=t)) + signal += float(ACTION_WEIGHTS.get(state, 0.1)) + price = float(np.clip(base * (1.0 + surge * signal), cost * 1.01, base * max_mult)) + t += max(0.2, rng.gamma(1.5, 0.8) if is_agent else rng.gamma(2.0, 1.2)) + return events, pidx + + +def put_prices_to_market(prices: np.ndarray, costs: np.ndarray, alpha: float = 0.2, n_sessions: int = 50, + seed: int | None = None) -> Tuple[List[Session], Dict[str, float]]: + """Generate sessions from mixture model. Returns sessions and demand mapping sid -> q_hat.""" + rng = np.random.default_rng(seed) + sessions, demand = [], {} + for i in range(n_sessions): + sid = f"s{i:04d}" + is_agent = rng.random() < alpha + trans = TRANS_A if is_agent else TRANS_H + theta = {"price_sens": rng.uniform(0.05, 0.2), "base_conv": 0.01} if is_agent else \ + {"price_sens": rng.uniform(1.5, 4.0), "base_conv": rng.uniform(0.2, 0.5)} + events, _ = sample_trajectory(rng, trans, prices, costs=costs, theta=theta, is_agent=is_agent) + session = Session(sid=sid, events=events, actor="A" if is_agent else "H", theta=theta) + sessions.append(session) + demand[sid] = compute_demand(session) + return sessions, demand + + +@dataclass +class LimboUpdate: + utype: str # "prices" or "demand" + data: np.ndarray | Dict[str, float] + t: int + + +class Limbo: + """Historical trajectory of alternating price/demand observations.""" + + def __init__(self): + self.history: List[LimboUpdate] = [] + self._t = 0 + + def add_update(self, utype: str, data: np.ndarray | Dict[str, float]) -> Dict: + self.history.append(LimboUpdate(utype=utype, data=data, t=self._t)) + self._t += 1 + return {"action": "observe_demand" if utype == "prices" else "set_prices"} + + def get_prices_history(self) -> List[np.ndarray]: + return [u.data for u in self.history if u.utype == "prices"] + + def get_demand_history(self) -> List[Dict[str, float]]: + return [u.data for u in self.history if u.utype == "demand"] + + +class System: + """Main pricing system implementing robust Stackelberg objective. + + Manages the alternating loop: set prices p_t -> observe demand Q_hat(p_t) -> + estimate contamination alpha from behavioral signals -> compute next prices. + """ + + def __init__(self, n_products: int = 10, costs: np.ndarray | None = None, lambda_coi: float = 0.5, seed: int | None = 42): + self.n = n_products + self.rng = np.random.default_rng(seed) + self.costs = costs if costs is not None else self.rng.uniform(10, 50, n_products) + self.refs = self.costs * (1 + self.rng.uniform(0.2, 0.5, n_products)) + self.lambda_coi = lambda_coi + self.limbo = Limbo() + self._alpha_est = 0.2 + self._sessions: List[Session] = [] + self._last_sessions: List[Session] = [] + self._last_coi: COIWindow | None = None + + @property + def alpha(self) -> float: + return self._alpha_est + + def _estimate_alpha_from_sessions(self) -> float: + if not self._sessions: + return self._alpha_est + return float(np.mean([estimate_alpha(s) for s in self._sessions[-50:]])) + + def _revenue_under_demand(self, prices: np.ndarray, demand: Dict[str, float]) -> float: + agg = np.zeros(self.n) + for sid, q in demand.items(): + sess = next((s for s in self._sessions if s.sid == sid), None) + if sess and sess.events: + agg[sess.events[0].product_idx] += q + return float(np.dot(prices, agg)) + + def _compute_coi_window(self, demand: Dict[str, float]) -> COIWindow: + if not self._last_sessions: + zeros = np.zeros(self.n, dtype=float) + return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=0.0, + policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros) + return compute_coi_window(self._last_sessions, self.costs, demand_mapping=demand) + + def _objective(self, prices: np.ndarray, demand: Dict[str, float]) -> float: + """Robust objective: R(p,d) - lambda * COI_leak.""" + profit = self._revenue_under_demand(prices, demand) - float(np.sum(self.costs)) + self._last_coi = self._compute_coi_window(demand) + return profit - self.lambda_coi * self._last_coi.leak + + def compute_prices(self, demand: Dict[str, float] | None = None) -> np.ndarray: + """Compute next prices via heuristic margin adjustment based on alpha estimate.""" + self._alpha_est = self._estimate_alpha_from_sessions() + margin_scale = 1.0 - 0.5 * self._alpha_est # defensive pricing under high contamination + margins = (self.refs - self.costs) * margin_scale + noise = self.rng.normal(0, 0.02, self.n) * self.costs + prices = np.clip(self.costs + margins + noise, self.costs * 1.02, self.refs * 1.3) + self.limbo.add_update("prices", prices) + return prices + + def observe_demand(self, prices: np.ndarray, alpha_true: float = 0.2, n_sessions: int = 50) -> Dict[str, float]: + sessions, demand_map = put_prices_to_market(prices, costs=self.costs, alpha=alpha_true, + n_sessions=n_sessions, seed=int(self.rng.integers(0, 10000))) + self._last_sessions = sessions + self._sessions.extend(sessions) + self.limbo.add_update("demand", demand_map) + return demand_map + + def step(self, alpha_true: float = 0.2, n_sessions: int = 50) -> Tuple[np.ndarray, Dict[str, float], float, COIWindow]: + demand_hist = self.limbo.get_demand_history() + prices = self.compute_prices(demand_hist[-1] if demand_hist else None) + demand = self.observe_demand(prices, alpha_true, n_sessions) + reward = self._objective(prices, demand) + return prices, demand, reward, self._last_coi or self._compute_coi_window(demand) + + def run(self, n_steps: int = 100, alpha_true: float = 0.2) -> Dict: + traj = {"prices": [], "demand": [], "rewards": [], "alpha_est": [], "alpha_true": alpha_true, + "coi_policy": [], "coi_agent": [], "coi_leak": [], "coi_survival": []} + for _ in range(n_steps): + p, d, r, coi = self.step(alpha_true) + traj["prices"].append(p); traj["demand"].append(d); traj["rewards"].append(r) + traj["alpha_est"].append(self._alpha_est) + traj["coi_policy"].append(coi.policy); traj["coi_agent"].append(coi.agent) + traj["coi_leak"].append(coi.leak); traj["coi_survival"].append(coi.survival_ratio) + return traj + + +if __name__ == "__main__": + sys = System(n_products=5, seed=42) + traj = sys.run(n_steps=20, alpha_true=0.25) + print(f"avg reward: {np.mean(traj['rewards']):.2f}, final alpha_hat: {traj['alpha_est'][-1]:.3f}, " + f"COI_policy: {np.mean(traj['coi_policy']):.3f}, COI_agent: {np.mean(traj['coi_agent']):.3f}, leak: {np.mean(traj['coi_leak']):.3f}") + + prices = np.array([20.0, 35.0, 50.0, 25.0, 40.0]) + costs = np.array([15.0, 28.0, 40.0, 18.0, 30.0]) + sessions, demand = put_prices_to_market(prices, costs=costs, alpha=0.3, n_sessions=20, seed=123) + print(f'sessions: {len(sessions)}, agents: {sum(1 for s in sessions if s.actor=="A")}') + + for n in [1, 5, 10, 50, 100]: + # theoretical: erosion = 1 - 2/(N+1) for uniform order statistic + print(f'N={n:3d} agents -> COI erosion: {1.0 - 2.0/(n+1):.3f}') + + events = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.5), Event('cart', 0, 20.0, 1.0), Event('purchase', 0, 20.0, 2.0)] + print(f'human-like session alpha_hat: {estimate_alpha(Session(sid="test", events=events, actor="H")):.3f}') + + events_a = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.2), Event('view', 0, 20.0, 0.3), Event('detail', 0, 20.0, 0.4)] + print(f'agent-like session alpha_hat: {estimate_alpha(Session(sid="test2", events=events_a, actor="A")):.3f}') diff --git a/sim/case/thesis_simplified/simplified_env.py b/sim/case/thesis_simplified/simplified_env.py new file mode 100644 index 0000000..70b3904 --- /dev/null +++ b/sim/case/thesis_simplified/simplified_env.py @@ -0,0 +1,249 @@ +"""Gymnasium-compatible RL environment for thesis pricing system. + +Wraps simplified.System with standard Gym interface for training pricing policies. +Supports multiple reward modes and contamination scenarios. + +Action: price multipliers [0.5, 1.5] applied to reference prices +Observation: [prices, demand_agg, alpha_est, margins, position_proxy] +Reward: configurable objective (revenue, profit, robust, coi-aware) +""" +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, Dict, Tuple +import numpy as np + +try: + import gymnasium as gym + from gymnasium import spaces + HAS_GYM = True +except ImportError: + HAS_GYM = False + +from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha +from .coi import COIWindow, compute_coi_window, coi_erosion + + +@dataclass +class EnvConfig: + n_products: int = 5 + max_steps: int = 200 + sessions_per_step: int = 30 + alpha_true: float = 0.2 + alpha_drift: float = 0.0 + alpha_bounds: Tuple[float, float] = (0.0, 0.6) + lambda_coi: float = 0.5 + lambda_vol: float = 0.1 + reward_mode: str = "robust" # revenue | profit | robust | coi_aware + normalize_reward: bool = True + seed: int | None = 42 + + +def aggregate_purchases(sessions: list[Session], n_products: int, costs: np.ndarray) -> Tuple[np.ndarray, float, float]: + """Aggregate purchases from sessions, returns (counts, revenue, cost).""" + purchases = np.zeros(n_products, dtype=float) + revenue, cost = 0.0, 0.0 + for sess in sessions: + for e in sess.events: + if e.action == "purchase" and 0 <= e.product_idx < n_products: + purchases[e.product_idx] += 1.0 + revenue += float(e.price_seen) + cost += float(costs[e.product_idx]) + return purchases, revenue, cost + + +class PricingEnv(gym.Env if HAS_GYM else object): + """RL environment for dynamic pricing under agent contamination. + + Platform sets prices p_t, market responds with mixture demand Q(p) = (1-alpha)*D_H + alpha*D_A. + Agent estimates contamination alpha_hat from behavioral signals. + Reward balances profit vs COI leakage. + """ + metadata = {"render_modes": ["human", "ansi"]} + + def __init__(self, cfg: EnvConfig | None = None): + if not HAS_GYM: + raise ImportError("gymnasium required") + self.cfg = cfg or EnvConfig() + self.n = self.cfg.n_products + self._sys: System | None = None + self._t = 0 + self._alpha = self.cfg.alpha_true + self._last_prices: np.ndarray | None = None + self._last_demand: Dict[str, float] | None = None + self._episode_rewards: list[float] = [] + self._demand_agg = np.zeros(self.n) + + self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32) + obs_dim = self.n + self.n + 1 + 1 + self.n + 1 # prices + demand + alpha_hat + alpha + margins + t + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32) + + def _build_obs(self) -> np.ndarray: + if self._sys is None: + return np.zeros(self.observation_space.shape[0], dtype=np.float32) + prices = self._last_prices if self._last_prices is not None else self._sys.refs + return np.concatenate([ + prices / (self._sys.refs + 1e-6), + self._demand_agg / (np.sum(self._demand_agg) + 1e-6), + [self._sys.alpha, self._alpha], + (prices - self._sys.costs) / (self._sys.costs + 1e-6), + [self._t / self.cfg.max_steps], + ]).astype(np.float32) + + def _compute_reward(self, prices: np.ndarray, demand: Dict[str, float]) -> float: + cfg, sys = self.cfg, self._sys + if sys is None: + return 0.0 + + # aggregate demand per product + agg = np.zeros(self.n) + for sid, q in demand.items(): + sess = next((s for s in sys._sessions if s.sid == sid), None) + if sess and sess.events: + agg[sess.events[0].product_idx] += q + self._demand_agg = agg + + _, revenue, cost = aggregate_purchases(sys._last_sessions, self.n, sys.costs) + profit = revenue - cost + + vol_penalty = 0.0 + if self._last_prices is not None: + vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6))) + + coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand) + leak = float(coi.leak) + + reward_fns = { + "revenue": lambda: revenue, + "profit": lambda: profit, + "robust": lambda: profit - cfg.lambda_coi * leak - vol_penalty, + "coi_aware": lambda: profit - cfg.lambda_coi * (1 + 2 * sys.alpha) * leak - vol_penalty, + } + r = reward_fns.get(cfg.reward_mode, lambda: profit)() + return float(r / (float(np.sum(sys.refs)) + 1e-6)) if cfg.normalize_reward else float(r) + + def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]: + seed = seed if seed is not None else self.cfg.seed + self._sys = System(n_products=self.n, lambda_coi=self.cfg.lambda_coi, seed=seed) + self._t, self._alpha = 0, self.cfg.alpha_true + self._last_prices, self._last_demand = None, None + self._episode_rewards, self._demand_agg = [], np.zeros(self.n) + return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha, + "costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()} + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]: + if self._sys is None: + raise RuntimeError("call reset() first") + + action = np.clip(action, 0.5, 1.5) + prices = np.clip(self._sys.refs * action.astype(np.float64), self._sys.costs * 1.01, self._sys.refs * 2.0) + demand = self._sys.observe_demand(prices, alpha_true=self._alpha, n_sessions=self.cfg.sessions_per_step) + self._sys.limbo.add_update("prices", prices) + self._sys._alpha_est = self._sys._estimate_alpha_from_sessions() + + reward = self._compute_reward(prices, demand) + self._episode_rewards.append(reward) + self._last_prices, self._last_demand = prices.copy(), demand + self._t += 1 + + # compute info metrics using shared helper + purchases, revenue, cost = aggregate_purchases(self._sys._last_sessions, self.n, self._sys.costs) + n_agents = int(self._alpha * self.cfg.sessions_per_step) + coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand) + + info = { + "alpha_true": self._alpha, "alpha_est": self._sys.alpha, + "alpha_error": abs(self._alpha - self._sys.alpha), + "revenue": float(revenue), "profit": float(revenue - cost), "cost": float(cost), + "n_purchases": int(np.sum(purchases)), + "avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)), + "n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)), + "coi_erosion": coi_erosion(coi.policy, coi.agent), + "coi_policy": float(coi.policy), "coi_agent": float(coi.agent), + "coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio), + "cumulative_reward": sum(self._episode_rewards), "step": self._t, + } + return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info + + def render(self, mode: str = "human") -> str | None: + if self._sys is None or self._last_prices is None: + return None + out = f"t={self._t}/{self.cfg.max_steps} | alpha_true={self._alpha:.3f} alpha_hat={self._sys.alpha:.3f} | " \ + f"prices: {self._last_prices.round(1)} | demand: {self._demand_agg.round(2)} | " \ + f"reward: {self._episode_rewards[-1] if self._episode_rewards else 0:.3f}" + if mode == "human": + print(out) + return out + + def close(self) -> None: + pass + + +class ContaminationSweepEnv(PricingEnv): + """Environment that sweeps through contamination levels during training.""" + + def __init__(self, cfg: EnvConfig | None = None, alpha_schedule: list[float] | None = None): + super().__init__(cfg) + self._schedule = alpha_schedule or [0.1, 0.2, 0.3, 0.4, 0.5] + self._schedule_idx = 0 + + def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]: + if options and options.get("advance_schedule", False): + self._schedule_idx = (self._schedule_idx + 1) % len(self._schedule) + self.cfg.alpha_true = self._schedule[self._schedule_idx] + return super().reset(seed, options) + + +class AdversarialEnv(PricingEnv): + """Environment with adversarial contamination dynamics. + + Contamination increases when prices are predictable (agents exploit). + """ + + def __init__(self, cfg: EnvConfig | None = None, exploitation_rate: float = 0.02): + super().__init__(cfg) + self._exploit_rate = exploitation_rate + self._price_history: list[np.ndarray] = [] + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]: + obs, reward, term, trunc, info = super().step(action) + if self._last_prices is not None: + self._price_history.append(self._last_prices.copy()) + predictability = 0.0 + if len(self._price_history) > 10: + predictability = 1.0 / (float(np.std(self._price_history[-10:])) + 0.1) + self._alpha = np.clip(self._alpha + self._exploit_rate * predictability * self._sys.rng.random(), *self.cfg.alpha_bounds) + info["predictability"] = predictability + return obs, reward, term, trunc, info + + def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]: + self._price_history = [] + return super().reset(seed, options) + + +def make_env(cfg: EnvConfig | None = None, env_type: str = "standard") -> PricingEnv: + return {"sweep": ContaminationSweepEnv, "adversarial": AdversarialEnv}.get(env_type, PricingEnv)(cfg) + + +# baseline policies +fixed_price_policy = lambda refs, margin=0.0: np.ones(len(refs), dtype=np.float32) * (1.0 + margin) +random_policy = lambda n, rng=None: (rng or np.random.default_rng()).uniform(0.7, 1.3, n).astype(np.float32) +adaptive_policy = lambda obs, n, base=0.1: np.ones(n, dtype=np.float32) * (1.0 + base * (1.0 - 0.4 * obs[2 * n])) + + +if __name__ == "__main__": + cfg = EnvConfig(n_products=100, max_steps=100, alpha_true=0.25, reward_mode="robust") + env = make_env(cfg) + obs, info = env.reset() + print(f"initial: alpha={info['alpha_true']:.2f}") + + total_reward = 0.0 + for t in range(cfg.max_steps): + action = adaptive_policy(obs, cfg.n_products) + obs, reward, done, _, info = env.step(action) + total_reward += reward + if t % 10 == 0: + env.render() + if done: + break + + print(f"\ntotal reward: {total_reward:.2f}, final alpha_hat: {info['alpha_est']:.3f}") diff --git a/sim/case/thesis_simplified/summarize.py b/sim/case/thesis_simplified/summarize.py new file mode 100644 index 0000000..10406aa --- /dev/null +++ b/sim/case/thesis_simplified/summarize.py @@ -0,0 +1,168 @@ +"""Summarize TensorBoard logs into comparison tables.""" +from __future__ import annotations +import json +import re +from pathlib import Path +from collections import defaultdict +from dataclasses import dataclass +import pandas as pd + +try: + from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + HAS_TB = True +except ImportError: + HAS_TB = False + + +@dataclass +class RunInfo: + algo: str + alpha: float + reward_mode: str + path: Path + + +def parse_run_name(name: str) -> RunInfo | None: + """Extract algo, alpha, reward_mode from run directory name.""" + # patterns: ppo_a0.20_robust, cmp_fixed_a0.20, sac_a0.90_robust + m = re.match(r'(cmp_)?(\w+)_a([\d.]+)_?(\w+)?', name) + if not m: + return None + prefix, algo, alpha, mode = m.groups() + return RunInfo(algo=algo, alpha=float(alpha), reward_mode=mode or 'robust', path=Path()) + + +def load_tb_scalars(log_dir: Path, tags: list[str], reduce: str = 'last') -> dict[str, float]: + """Load scalar values from TensorBoard event files.""" + if not HAS_TB: + return {} + ea = EventAccumulator(str(log_dir)) + ea.Reload() + results = {} + for tag in tags: + if tag in ea.Tags().get('scalars', []): + events = ea.Scalars(tag) + if not events: + continue + vals = [e.value for e in events] + if reduce == 'last': + results[tag] = vals[-1] + elif reduce == 'mean': + results[tag] = sum(vals) / len(vals) + elif reduce == 'max': + results[tag] = max(vals) + elif reduce == 'min': + results[tag] = min(vals) + return results + + +def load_json_results(log_dir: Path) -> dict[str, float]: + """Load metrics from results.json if available.""" + results_file = log_dir / 'results.json' + if results_file.exists(): + with open(results_file) as f: + return json.load(f) + return {} + + +def discover_runs(base_dir: Path) -> list[RunInfo]: + """Find all experiment runs in base directory.""" + runs = [] + for d in base_dir.iterdir(): + if not d.is_dir(): + continue + info = parse_run_name(d.name) + if info: + info.path = d + runs.append(info) + return runs + + +def build_tables(runs: list[RunInfo], metrics: list[str], reduce: str = 'last') -> dict[str, dict[str, pd.DataFrame]]: + """Build pivot tables: reward_mode -> metric -> DataFrame[alpha x algo].""" + # collect data: {reward_mode: {metric: {(alpha, algo): value}}} + data = defaultdict(lambda: defaultdict(dict)) + + tb_tags = [f'economics/{m}' if m in ['revenue', 'profit', 'margin'] else f'coi/{m}' if m in ['erosion', 'leakage'] else f'alpha/{m}' for m in metrics] + tag_map = dict(zip(tb_tags, metrics)) + + for run in runs: + # try json first (final eval metrics) + jm = load_json_results(run.path) + tb = load_tb_scalars(run.path, tb_tags, reduce) + + for tag, metric in tag_map.items(): + val = None + json_key = f'{metric}_mean' if metric != 'reward' else 'reward_mean' + if json_key in jm: + val = jm[json_key] + elif tag in tb: + val = tb[tag] + if val is not None: + data[run.reward_mode][metric][(run.alpha, run.algo)] = val + + # convert to DataFrames + tables = {} + for mode, metrics_data in data.items(): + tables[mode] = {} + for metric, vals in metrics_data.items(): + if not vals: + continue + alphas = sorted(set(a for a, _ in vals.keys())) + algos = sorted(set(al for _, al in vals.keys())) + df = pd.DataFrame(index=alphas, columns=algos, dtype=float) + for (a, al), v in vals.items(): + df.loc[a, al] = v + df.index.name = 'alpha' + tables[mode][metric] = df + return tables + + +def format_table(df: pd.DataFrame, fmt: str = '.3f') -> str: + """Format DataFrame as markdown table.""" + return df.to_markdown(floatfmt=fmt) + + +def summarize(base_dir: str = 'sim/case/thesis_simplified/runs', + metrics: list[str] | None = None, + reduce: str = 'last', + output: str | None = None) -> dict: + """Generate summary tables from experiment runs.""" + base = Path(base_dir) + metrics = metrics or ['revenue', 'profit', 'margin', 'erosion', 'leakage'] + + runs = discover_runs(base) + if not runs: + print(f"No runs found in {base}") + return {} + + print(f"Found {len(runs)} runs") + tables = build_tables(runs, metrics, reduce) + + lines = [] + for mode, metric_tables in sorted(tables.items()): + lines.append(f"\n# Reward Mode: {mode}\n") + for metric, df in sorted(metric_tables.items()): + lines.append(f"\n## {metric}\n") + lines.append(format_table(df)) + lines.append("") + + report = '\n'.join(lines) + print(report) + + if output: + Path(output).write_text(report) + print(f"\nSaved to {output}") + + return tables + + +if __name__ == '__main__': + import argparse + p = argparse.ArgumentParser() + p.add_argument('--dir', default='sim/case/thesis_simplified/runs') + p.add_argument('--metrics', nargs='+', default=['revenue', 'profit', 'margin', 'erosion', 'leakage']) + p.add_argument('--reduce', default='last', choices=['last', 'mean', 'max', 'min']) + p.add_argument('--output', '-o', help='save markdown to file') + args = p.parse_args() + summarize(args.dir, args.metrics, args.reduce, args.output) diff --git a/sim/case/thesis_simplified/train.py b/sim/case/thesis_simplified/train.py new file mode 100644 index 0000000..a405c44 --- /dev/null +++ b/sim/case/thesis_simplified/train.py @@ -0,0 +1,336 @@ +"""RL training for thesis pricing system with thesis-aligned metrics. + +Trains pricing policies using stable-baselines3 with TensorBoard logging. +Tracks COI erosion, alpha estimation error, and economic KPIs per thesis formulation. +""" +from __future__ import annotations +import argparse +import json +from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import dataclass, asdict, field +from pathlib import Path +from typing import Dict, List, Callable, Any +import numpy as np + +try: + from stable_baselines3 import PPO, SAC, A2C + from stable_baselines3.common.callbacks import BaseCallback, EvalCallback + from stable_baselines3.common.vec_env import DummyVecEnv + from stable_baselines3.common.monitor import Monitor + HAS_SB3 = True +except ImportError: + HAS_SB3 = False + +try: + from torch.utils.tensorboard import SummaryWriter + HAS_TB = True +except ImportError: + HAS_TB = False + +from .simplified_env import PricingEnv, EnvConfig, make_env, adaptive_policy, fixed_price_policy, random_policy + + +@dataclass +class EpisodeMetrics: + reward: float = 0.0 + revenue: float = 0.0 + profit: float = 0.0 + coi_erosion: float = 0.0 + coi_leakage: float = 0.0 + alpha_error: float = 0.0 + avg_margin: float = 0.0 + n_agents: int = 0 + steps: int = 0 + + def accumulate(self, info: Dict[str, Any]) -> None: + self.steps += 1 + self.reward += info.get('reward', 0) + self.revenue += info.get('revenue', 0) + self.profit += info.get('profit', 0) + self.coi_erosion += info.get('coi_erosion', 0) + self.coi_leakage += info.get('coi_leakage', 0) + self.alpha_error += abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)) + self.avg_margin += info.get('avg_margin', 0) + self.n_agents += info.get('n_agents', 0) + + def normalized(self) -> Dict[str, float]: + s = max(self.steps, 1) + return {k: getattr(self, k) / s for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin', 'n_agents']} + + +@dataclass +class ExperimentConfig: + algo: str = "ppo" + total_timesteps: int = 100_000 + n_envs: int = 4 + eval_freq: int = 5000 + n_eval_episodes: int = 10 + log_dir: str = "sim/case/thesis_simplified/runs" + seed: int = 42 + n_products: int = 10 + max_steps: int = 200 + alpha_true: float = 0.2 + reward_mode: str = "robust" + experiment_name: str | None = None + + def __post_init__(self): + if self.experiment_name is None: + self.experiment_name = f"{self.algo}_a{self.alpha_true:.2f}_{self.reward_mode}" + + +class Policy: + """Unified policy interface for baselines and trained models.""" + + def __init__(self, policy_fn: Callable[[np.ndarray, int], np.ndarray], name: str): + self._fn, self.name = policy_fn, name + + def predict(self, obs: np.ndarray, deterministic: bool = True) -> tuple[np.ndarray, None]: + return self._fn(obs, (len(obs) - 3) // 3), None + + @staticmethod + def fixed(margin: float = 0.15) -> "Policy": + return Policy(lambda obs, n: fixed_price_policy(np.ones(n), margin), f"fixed_{margin:.2f}") + + @staticmethod + def adaptive(base_margin: float = 0.15) -> "Policy": + return Policy(lambda obs, n: adaptive_policy(obs, n, base_margin), f"adaptive_{base_margin:.2f}") + + @staticmethod + def random() -> "Policy": + return Policy(lambda obs, n: random_policy(n), "random") + + @staticmethod + def myopic(greed: float = 0.3) -> "Policy": + def _fn(obs: np.ndarray, n: int) -> np.ndarray: + demand_norm = obs[n:2*n] if len(obs) > 2*n else np.ones(n) * 0.5 + return np.ones(n, dtype=np.float32) * np.clip(1.0 + greed * (1 + np.mean(demand_norm)), 0.5, 1.5) + return Policy(_fn, f"myopic_{greed:.1f}") + + +def log_metrics(writer: SummaryWriter | None, metrics: Dict[str, float], prefix: str, step: int) -> None: + if writer is None: + return + for k, v in metrics.items(): + writer.add_scalar(f'{prefix}/{k}', v, step) + + +class MetricsCallback(BaseCallback): + def __init__(self, writer: SummaryWriter | None, verbose: int = 0): + super().__init__(verbose) + self._writer = writer + + def _on_step(self) -> bool: + if self._writer is None: + return True + for info in self.locals.get('infos', []): + t = self.num_timesteps + self._writer.add_scalar('economics/revenue', info.get('revenue', 0), t) + self._writer.add_scalar('economics/profit', info.get('profit', 0), t) + self._writer.add_scalar('economics/margin', info.get('avg_margin', 0), t) + self._writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), t) + self._writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), t) + self._writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), t) + self._writer.add_scalar('agents/count', info.get('n_agents', 0), t) + return True + + +def make_vec_env(cfg: ExperimentConfig, n_envs: int = 1) -> DummyVecEnv: + def _make(): + return Monitor(make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps, + alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed))) + return DummyVecEnv([_make for _ in range(n_envs)]) + + +def run_episodes(policy: Policy | Any, env: PricingEnv, n_episodes: int) -> List[EpisodeMetrics]: + """Run policy for n episodes and collect metrics.""" + metrics = [] + for _ in range(n_episodes): + obs, _ = env.reset() + ep, done = EpisodeMetrics(), False + while not done: + action, _ = policy.predict(obs, deterministic=True) + obs, reward, term, trunc, info = env.step(action) + done = term or trunc + ep.accumulate(info) + ep.reward += reward + metrics.append(ep) + return metrics + + +def evaluate_policy(policy: Policy | Any, cfg: ExperimentConfig, n_episodes: int = 20) -> Dict[str, float]: + env = make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps, + alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed + 999)) + metrics = run_episodes(policy, env, n_episodes) + return { + 'reward_mean': np.mean([m.reward for m in metrics]), 'reward_std': np.std([m.reward for m in metrics]), + **{f'{k}_mean': np.mean([m.normalized()[k] for m in metrics]) + for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin']}, + } + + +def run_baseline(policy: Policy, vec_env: DummyVecEnv, total_steps: int, writer: SummaryWriter | None): + obs, n_envs = vec_env.reset(), vec_env.num_envs + ep_rewards = np.zeros(n_envs) + + for step in range(0, total_steps, n_envs): + actions = np.array([policy.predict(obs[i])[0] for i in range(n_envs)]) + obs, rewards, dones, infos = vec_env.step(actions) + ep_rewards += rewards + for i, info in enumerate(infos): + if writer: + writer.add_scalar('economics/revenue', info.get('revenue', 0), step) + writer.add_scalar('economics/profit', info.get('profit', 0), step) + writer.add_scalar('economics/margin', info.get('avg_margin', 0), step) + writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), step) + writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), step) + writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), step) + writer.add_scalar('agents/count', info.get('n_agents', 0), step) + if dones[i]: + if writer: + writer.add_scalar('rollout/ep_reward', ep_rewards[i], step) + ep_rewards[i] = 0 + + +def train(cfg: ExperimentConfig) -> Dict[str, Any]: + is_baseline = cfg.algo.lower() in ["fixed", "adaptive", "random", "myopic"] + if not HAS_SB3 and not is_baseline: + raise ImportError("stable-baselines3 required: pip install stable-baselines3[extra]") + + log_path = Path(cfg.log_dir) / cfg.experiment_name + log_path.mkdir(parents=True, exist_ok=True) + with open(log_path / "config.json", "w") as f: + json.dump(asdict(cfg), f, indent=2) + + writer = SummaryWriter(log_path) if HAS_TB else None + train_env, eval_env = make_vec_env(cfg, cfg.n_envs), make_vec_env(cfg, 1) + + if is_baseline: + policy = {"fixed": Policy.fixed, "adaptive": Policy.adaptive, "random": Policy.random, "myopic": Policy.myopic}[cfg.algo.lower()]() + run_baseline(policy, train_env, cfg.total_timesteps, writer) + final_metrics = evaluate_policy(policy, cfg) + else: + algo_cls = {"ppo": PPO, "sac": SAC, "a2c": A2C}[cfg.algo.lower()] + common = dict(verbose=1, seed=cfg.seed, tensorboard_log=str(log_path), device="auto") + model = { + "ppo": lambda: PPO("MlpPolicy", train_env, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01, **common), + "sac": lambda: SAC("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50_000, batch_size=512, tau=0.02, gamma=0.99, learning_starts=1000, ent_coef="auto_0.1", train_freq=4, **common), + "a2c": lambda: A2C("MlpPolicy", train_env, learning_rate=7e-4, n_steps=5, gamma=0.99, **common), + }[cfg.algo.lower()]() + + cb = MetricsCallback(writer) + eval_cb = EvalCallback(eval_env, best_model_save_path=str(log_path / "best"), log_path=str(log_path), + eval_freq=cfg.eval_freq, n_eval_episodes=cfg.n_eval_episodes, deterministic=True) + model.learn(cfg.total_timesteps, callback=[cb, eval_cb], progress_bar=True) + model.save(log_path / "final_model") + policy = model + final_metrics = evaluate_policy(model, cfg) + + if writer: + log_metrics(writer, final_metrics, 'final', cfg.total_timesteps) + writer.close() + + train_env.close(); eval_env.close() + with open(log_path / "results.json", "w") as f: + json.dump(final_metrics, f, indent=2) + return {"path": str(log_path), "metrics": final_metrics} + + +def _train_alpha(args: tuple) -> tuple[str, Dict]: + """Worker for parallel sweep - must be top-level for pickling.""" + cfg_dict, alpha = args + cfg_dict["alpha_true"] = alpha + cfg_dict["experiment_name"] = f"{cfg_dict['algo']}_a{alpha:.2f}_{cfg_dict['reward_mode']}" + sweep_cfg = ExperimentConfig(**cfg_dict) + print(f"[alpha={alpha:.2f}] starting") + metrics = train(sweep_cfg)["metrics"] + print(f"[alpha={alpha:.2f}] done") + return f"alpha_{alpha:.2f}", metrics + + +def run_sweep(cfg: ExperimentConfig, alphas: List[float] | None = None, max_workers: int | None = None) -> Dict[str, Dict]: + alphas = alphas or [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + cfg_dict = asdict(cfg) + + if max_workers == 1: # sequential fallback + results = dict(_train_alpha((cfg_dict.copy(), a)) for a in alphas) + else: + with ProcessPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(_train_alpha, (cfg_dict.copy(), a)): a for a in alphas} + results = {} + for fut in as_completed(futures): + key, metrics = fut.result() + results[key] = metrics + + summary_path = Path(cfg.log_dir) / f"sweep_{cfg.algo}_{cfg.reward_mode}.json" + with open(summary_path, "w") as f: + json.dump(results, f, indent=2) + print(f"\nSweep results saved to {summary_path}") + return results + + +def _train_policy(args: tuple) -> tuple[str, Dict]: + """Worker for parallel policy comparison.""" + cfg_dict, algo = args + cfg_dict["algo"] = algo + cfg_dict["experiment_name"] = f"cmp_{algo}_a{cfg_dict['alpha_true']:.2f}" + cmp_cfg = ExperimentConfig(**cfg_dict) + print(f"[{algo}] starting") + metrics = train(cmp_cfg)["metrics"] + print(f"[{algo}] done") + return algo, metrics + + +def compare_policies(cfg: ExperimentConfig, policies: List[str] | None = None, max_workers: int | None = None) -> Dict[str, Dict]: + policies = policies or ["fixed", "adaptive", "myopic", "random"] + cfg_dict = asdict(cfg) + + if max_workers == 1: + results = dict(_train_policy((cfg_dict.copy(), p)) for p in policies) + else: + with ProcessPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(_train_policy, (cfg_dict.copy(), p)): p for p in policies} + results = {} + for fut in as_completed(futures): + algo, metrics = fut.result() + results[algo] = metrics + + cmp_path = Path(cfg.log_dir) / f"compare_a{cfg.alpha_true:.2f}.json" + with open(cmp_path, "w") as f: + json.dump(results, f, indent=2) + print(f"\nComparison saved to {cmp_path}") + for algo, m in results.items(): + print(f" {algo:12s}: reward={m['reward_mean']:.2f} coi_erosion={m['coi_erosion_mean']:.4f} alpha_err={m['alpha_error_mean']:.4f}") + return results + + +def main(): + parser = argparse.ArgumentParser(description="Train RL pricing policies") + parser.add_argument("--algo", default="ppo", choices=["ppo", "sac", "a2c", "fixed", "adaptive", "random", "myopic"]) + parser.add_argument("--steps", type=int, default=100_000) + parser.add_argument("--alpha", type=float, default=0.2) + parser.add_argument("--reward-mode", default="robust", choices=["revenue", "profit", "robust", "coi_aware"]) + parser.add_argument("--n-products", type=int, default=10) + parser.add_argument("--n-envs", type=int, default=4) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--log-dir", default="sim/case/thesis_simplified/runs") + parser.add_argument("--sweep", action="store_true", help="run contamination sweep") + parser.add_argument("--compare", action="store_true", help="compare all baselines") + parser.add_argument("--workers", type=int, default=None, help="max parallel workers for sweep (None=auto, 1=sequential)") + args = parser.parse_args() + + cfg = ExperimentConfig(algo=args.algo, total_timesteps=args.steps, alpha_true=args.alpha, + reward_mode=args.reward_mode, n_products=args.n_products, + n_envs=args.n_envs, seed=args.seed, log_dir=args.log_dir) + + if args.sweep: + run_sweep(cfg, max_workers=args.workers) + elif args.compare: + compare_policies(cfg, max_workers=args.workers) + else: + result = train(cfg) + print(f"\nTraining complete: {result['path']}") + print(f"Metrics: {json.dumps(result['metrics'], indent=2)}") + + +if __name__ == "__main__": + main() diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py new file mode 100644 index 0000000..3336956 --- /dev/null +++ b/sim/rl/behavior_loader/loader.py @@ -0,0 +1,97 @@ +import os +import json +from pydantic import BaseModel as Base + +class PayloadModel(Base): + sessionId: str + experimentId: str | None + eventName: str + page: str | None + productId: str | None + metadata: dict + storeMode: str + userAgent: str + ts: str + +class ValueModel(Base): + payload: PayloadModel + encoding: str + isPayloadNull: bool + schemaId: int + size: int + +class InteractionModel(Base): + partitionID: int + offset: int + timestamp: int + compression: str + isTransactional: bool + headers: list + key: dict + value: ValueModel + +def _is_admin(page: str | None) -> bool: + return page is not None and page.startswith("/admin/") + +class Loader: + def __init__(self, src_dir: str): + self.src_dir = src_dir + self.entries = os.listdir(src_dir) + if not self.entries: raise ValueError("empty directory") + self.data = self._load_sessions() + + def _load_sessions(self) -> dict: + sessions = {} + for entry in self.entries: + with open(f"{self.src_dir}/{entry}/int.json") as f: + raw = json.load(f) + ints = [InteractionModel(**i) for i in raw] + sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)] + return sessions + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +class AgentLoader(Loader): + def _load_sessions(self) -> dict: + sessions = {} + for entry in self.entries: + with open(f"{self.src_dir}/{entry}/int.json") as f: + raw = json.load(f) + ints = [PayloadModel(**i) for i in raw] + sessions[entry] = [i for i in ints if not _is_admin(i.page)] + return sessions + +class JointLoader: + def __init__(self, human_dir: str, agent_dir: str): + self.human_loader = Loader(human_dir) + self.agent_loader = AgentLoader(agent_dir) + self.data = self._merge() + self.entries = list(self.data.keys()) + + def _merge(self) -> dict: + return { + **{f"human_{sid}": [e.value.payload for e in evts] + for sid, evts in self.human_loader.get_data().items()}, + **{f"agent_{sid}": evts + for sid, evts in self.agent_loader.get_data().items()} + } + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +if __name__ == "__main__": + agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + + for name, cls, path in [("agent", AgentLoader, agent_dir), + ("human", Loader, human_dir), + ("joint", lambda d: JointLoader(human_dir, d), agent_dir)]: + ldr = cls(path) if name != "joint" else cls(agent_dir) + print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions") diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py new file mode 100644 index 0000000..bbe5053 --- /dev/null +++ b/sim/rl/behavior_loader/models.py @@ -0,0 +1,256 @@ +try: + from loader import Loader, AgentLoader, JointLoader +except ImportError: + from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader +from collections import defaultdict +from typing import Dict, List, Tuple, Set +import numpy as np +import graphviz +import sys +from pathlib import Path + +# import lib utilities for optional use - models keep their own _state_repr for backwards compat +# with the specific event structure (evt.value.payload) +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib')) +try: + from lib.state import make_state_repr as lib_make_state_repr + from lib.features import transition_histogram as lib_transition_histogram +except ImportError: + lib_make_state_repr = None + lib_transition_histogram = None + + +class BehaviorModel: + def __init__(self, src_dir: str, loader_cls=Loader): + self.loader = loader_cls(src_dir) + self.data = self.loader.get_data() + self.entries, self.num_entries = self.loader.get_entries() + self.mdp = None + + def _state_repr(self, evt) -> str: + p = evt.value.payload + return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}" + + def _sort_key(self, evt): + return evt.timestamp + + def _extract_sessions(self) -> List[List[str]]: + trajs = [] + for evts in self.data.values(): + if len(evts) < 2: continue + states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)] + trajs.append(states) + return trajs + + def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]: + trans, states = defaultdict(lambda: defaultdict(int)), set() + for traj in trajs: + for s, s_next in zip(traj, traj[1:]): + trans[s][s_next] += 1 + states.update([s, s_next]) + return trans, states + + def _calc_rewards(self, trajs: List[List[str]]) -> Dict: + rwd = defaultdict(list) + for traj in trajs: + n = len(traj) + for i, s in enumerate(traj): + rwd[s].append(i / n) + return rwd + + def _normalize_trans(self, cnts: Dict) -> Dict: + return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()} + for s, nxt in cnts.items()} + + def build_MDP(self) -> Dict: + trajs = self._extract_sessions() + trans_cnt, states = self._calc_transitions(trajs) + trans_prob = self._normalize_trans(trans_cnt) + state_rwd = self._calc_rewards(trajs) + + self.mdp = { + 'states': sorted(states), + 'num_states': len(states), + 'transitions': trans_prob, + 'state_values': {s: np.mean(r) for s, r in state_rwd.items()}, + 'state_rewards': state_rwd, + 'trans_counts': trans_cnt, + } + return self.mdp + + def transition_prob(self, s: str, s_next: str) -> float: + if not self.mdp: raise ValueError("build MDP first") + return self.mdp['transitions'].get(s, {}).get(s_next, 0.0) + + def state_value(self, s: str) -> float: + if not self.mdp: raise ValueError("build MDP first") + return self.mdp['state_values'].get(s, 0.0) + + def sample_traj(self, start: str, max_len: int = 50) -> List[str]: + if not self.mdp: raise ValueError("build MDP first") + path, curr = [start], start + for _ in range(max_len): + nxt = self.mdp['transitions'].get(curr, {}) + if not nxt: break + curr = np.random.choice(list(nxt.keys()), p=list(nxt.values())) + path.append(curr) + return path + + def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray: + """Convert trajectory to feature vector using MDP structure for contrastive learning""" + if not self.mdp: + self.build_MDP() + + states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)] + features = [] + + # transition histogram over MDP state space + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()] + trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]] + trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec)) # pad + total_trans = sum(trans_counts.values()) or 1 + features.extend([v / total_trans for v in trans_vec]) + + # state coverage ratio + visited = set(states) + features.append(len(visited) / max(self.mdp['num_states'], 1)) + + # temporal entropy of transitions + if len(states) > 1: + trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])] + entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0) + features.append(entropy / max(len(states), 1)) + else: + features.append(0.0) + + # trajectory length and unique state count + features.append(len(states)) + features.append(len(visited)) + + # state value statistics along trajectory + vals = [self.state_value(s) for s in states] + if vals: + features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)]) + else: + features.extend([0.0, 0.0, 0.0, 0.0]) + + return np.array(features, dtype=np.float32) + + +class AgentBehaviorModel(BehaviorModel): + def __init__(self, src_dir: str): + super().__init__(src_dir, AgentLoader) + + def _state_repr(self, evt) -> str: + return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" + + def _sort_key(self, evt): + return evt.ts + +class JointBehaviorModel(BehaviorModel): + def __init__(self, human_dir: str, agent_dir: str): + self.loader = JointLoader(human_dir, agent_dir) + self.data = self.loader.get_data() + self.entries, self.num_entries = self.loader.get_entries() + self.mdp = None + + def _state_repr(self, evt) -> str: + return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" + + def _sort_key(self, evt): + return evt.ts + +def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]: + evt_trans = defaultdict(lambda: defaultdict(float)) + for s, trans in mdp['transitions'].items(): + src = s.split('|')[2] + for s_next, prob in trans.items(): + dst = s_next.split('|')[2] + evt_trans[src][dst] += prob + + for src in evt_trans: + total = sum(evt_trans[src].values()) + if total > 0: + evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()} + return dict(evt_trans) + +def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", + fmt: str = "svg", view: bool = False, export_dot: bool = False): + if not model.mdp: raise ValueError("build MDP first") + + evt_trans = aggregate_event_transitions(model.mdp) + g = graphviz.Digraph(format=fmt) + g.attr(rankdir='LR', size='30') + g.attr('node', shape='circle', width='1', height='1') + + events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()} + for evt in events: + g.node(evt) + + for src, dsts in evt_trans.items(): + for dst, prob in dsts.items(): + if prob > threshold: + g.edge(src, dst, label=f'{prob:.2f}') + + g.render(output, view=view, cleanup=True) + print(f"Saved MDP graph to {output}.{fmt}") + + if export_dot: + with open(f"{output}.dot", 'w') as f: + f.write(g.source) + print(f"Exported DOT source to {output}.dot") + + return g + +def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float: + eps = 1e-10 + # p + log(p / q) summed over all keys in P + return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p) + +if __name__ == "__main__": + base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" + human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" + + human_model = BehaviorModel(human_dir) + human_mdp = human_model.build_MDP() + print(f"Built MDP: {human_mdp['num_states']} states, " + f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions") + if not human_mdp['states']: + exit("No states found") + visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True) + + agent_model = AgentBehaviorModel(agent_dir) + agent_mdp = agent_model.build_MDP() + + print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, " + f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions") + if not agent_mdp['states']: + exit("No states found") + visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True) + + human_evt = aggregate_event_transitions(human_mdp) + agent_evt = aggregate_event_transitions(agent_mdp) + + common = set(human_evt.keys()) & set(agent_evt.keys()) + + if not common: + exit("No common event types for KL divergence analysis") + + kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common], + key=lambda x: x[1], reverse=True) + + print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}") + print("\nMost divergent event types:") + for evt, kl in kl_divs: + print(f" {evt}: {kl:.4f}") + + print("\n=== Joint Model (Human + Agent Combined) ===") + joint_model = JointBehaviorModel(human_dir, agent_dir) + joint_mdp = joint_model.build_MDP() + print(f"Built joint MDP: {joint_mdp['num_states']} states, " + f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions") + if joint_mdp['states']: + visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True) diff --git a/sim/rl/engine.py b/sim/rl/engine.py new file mode 100644 index 0000000..2e1650c --- /dev/null +++ b/sim/rl/engine.py @@ -0,0 +1,240 @@ +from os import kill +import numpy as np +import pandas as pd +from abc import ABC, abstractmethod +from typing import Dict, Any +from sim.rl.environment import BusinessLogicConstraints + +""" +An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature. +From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism. +""" + +class BasePricingEngine(ABC): + """base interface for all pricing engines""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + self.c = constraints + self.rng = np.random.default_rng(seed) + self.step_count = 0 + + + @abstractmethod + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + """compute new prices given current state and observation from environment + + args: + current_prices: current price vector [N] + observation: dict containing 'price', 'demand', and possibly interaction data + + returns: + new_prices: updated price vector [N] + """ + pass + + def update(self, observation: Dict[str, Any], reward: float, done: bool, info: Dict[str, Any]) -> None: + """Default no-op update. Engines can override as needed.""" + self.last_observation = observation + self.last_reward = reward + self.last_info = info + + + + + def reset(self): + """reset engine state for new episode""" + self.step_count = 0 + + +class WildPricingEngine(BasePricingEngine): + """production-like pricing using online elasticity estimation via EWMA regression""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + # per-product unit costs (unknown to customers; known to platform) + self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catalogue_size).astype(np.float32) + # online elasticity estimate (start moderately elastic) + self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32) + # EWMA state for log-log regression + self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32) + self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32) + self.cov_pq = np.zeros(self.c.product_catalogue_size, dtype=np.float32) + self.var_p = np.ones(self.c.product_catalogue_size, dtype=np.float32) + # knobs typical in production + self.lr = 0.08 + self.ewma = 0.05 + self.eps_explore = 0.03 + self.explore_scale = 0.03 + + def _safe_elasticity(self, e: np.ndarray) -> np.ndarray: + return np.clip(e, -5.0, -1.05) + + def reset(self): + super().reset() + self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32) + self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32) + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + # extract demand signal (from env observation) as proxy for sales + demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32)) + return self._update_from_demand(current_prices, demand) + + def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray: + # log transforms (add 1 to handle zeros) + logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32) + logq = np.log(sold + 1.0).astype(np.float32) + # EWMA moments for per-product regression: logq ≈ a + e*logp + a = self.ewma + dp = logp - self.mu_logp + dq = logq - self.mu_logq + self.mu_logp = (1 - a) * self.mu_logp + a * logp + self.mu_logq = (1 - a) * self.mu_logq + a * logq + self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq) + self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6) + e_new = self.cov_pq / (self.var_p + 1e-6) + self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new) + # profit-optimal price for isoelastic demand (if e < -1) + e = self.e_hat + p_star = self.unit_cost * (e / (e + 1.0)) + # smooth toward p_star + new_prices = (1 - self.lr) * prices + self.lr * p_star + # exploration (small random perturbations) + if self.rng.random() < self.eps_explore: + noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32) + new_prices = new_prices * (1.0 + noise) + # apply business guardrails (max change + bounds) + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = prices * ratio + new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + return new_prices + + +class StaticPricingEngine(BasePricingEngine): + """baseline: fixed prices throughout episode""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.fixed_prices = None + + def reset(self): + super().reset() + self.fixed_prices = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.fixed_prices is None: + self.fixed_prices = current_prices.copy() + return self.fixed_prices.copy() + + +class SimpleDemandEngine(BasePricingEngine): + """demand-driven pricing: increase price when demand rises, decrease when it falls""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.prev_demand = None + self.lr = 0.05 + + def reset(self): + super().reset() + self.prev_demand = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + demand = _extract_demand(observation, self.c.product_catalogue_size) + if self.prev_demand is None: + self.prev_demand = demand.copy() + return current_prices.copy() + # simple rule: if demand increases, raise price; if decreases, lower price + delta_d = demand - self.prev_demand + price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0) + new_prices = current_prices * (1.0 + price_adj) + self.prev_demand = demand.copy() + # apply constraints + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = current_prices * ratio + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + + +class RandomWalkEngine(BasePricingEngine): + """random walk pricing with mean reversion""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.target_price = None + self.volatility = 0.02 + + def reset(self): + super().reset() + self.target_price = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.target_price is None: + self.target_price = current_prices.copy() + # random walk with mean reversion toward target + noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32) + reversion = 0.01 * (self.target_price - current_prices) + new_prices = current_prices * (1.0 + noise) + reversion + # apply constraints + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = current_prices * ratio + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + + +class ThompsonSamplingEngine(BasePricingEngine): + """bayesian bandit approach per product treating price as discrete action""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.n_price_levels = 5 + self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32) + self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32) + self.price_grid = None + self.last_actions = None + + def reset(self): + super().reset() + self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32) + self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32) + self.price_grid = None + self.last_actions = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.price_grid is None: + # define price grid per product + lo = current_prices * 0.7 + hi = current_prices * 1.3 + self.price_grid = np.linspace(lo, hi, self.n_price_levels).T + demand = _extract_demand(observation, self.c.product_catalogue_size) + # update beliefs based on last action + if self.last_actions is not None: + for i in range(self.c.product_catalogue_size): + a = self.last_actions[i] + reward = demand[i] + if reward > 0.5: + self.alpha[i, a] += reward + else: + self.beta[i, a] += 1.0 + # thompson sampling: sample from posterior, pick best + new_prices = np.zeros(self.c.product_catalogue_size, dtype=np.float32) + actions = np.zeros(self.c.product_catalogue_size, dtype=int) + for i in range(self.c.product_catalogue_size): + theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32) + actions[i] = int(np.argmax(theta)) + new_prices[i] = self.price_grid[i, actions[i]] + self.last_actions = actions + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + + +def _extract_demand(observation: Dict[str, Any], n: int) -> np.ndarray: + if "elasticity" in observation and isinstance(observation["elasticity"], dict): + d = observation["elasticity"].get("demand") + if d is not None: + return np.asarray(d, dtype=np.float32) + d = observation.get("demand") + if d is not None: + return np.asarray(d, dtype=np.float32) + return np.zeros(n, dtype=np.float32) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index 19f9ad4..94bc8e1 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -1,451 +1,244 @@ -import gymnasium as gym -from gymnasium import spaces -import numpy as np +from __future__ import annotations + from dataclasses import dataclass -import pandas as pd -from typing import Callable, Optional, Dict, Any, List +from typing import Any, Dict, Optional, Tuple -# "learner" agent learning to optimize pricing -# "agent" part of environment creating demand signals that learner processes +import numpy as np + +try: + import gymnasium as gym + from gymnasium import spaces +except ImportError as e: + raise ImportError("sim.rl.environment requires gymnasium") from e + +from sim.case.thesis_simplified.coi import COIWindow, coi_erosion, compute_coi_window +from sim.case.thesis_simplified.separability import estimate_alpha as estimate_session_alpha +from sim.case.thesis_simplified.simplified import Limbo, Session, put_prices_to_market +from sim.rl.thesis_core import aggregate_demand_by_product, aggregate_purchases, constrain_prices + + +@dataclass(frozen=True) +class BusinessLogicConstraints: + product_catalogue_size: int = 100 + max_steps: int = 2000 + sessions_per_step: int = 250 -@dataclass -class BusinessLogicConstraints(): - max_price_adjustment: float = 0.30 system_max_price: float = 500.0 system_min_price: float = 1.0 - product_catelogue_size: int = 100 - episode_length: int = 200 - sessions_per_step: int = 250 - agent_share: float = 0.25 - agent_recon_multiplier: float = 6.0 - agent_purchase_probability: float = 0.20 + max_price_adjustment: float = 0.30 + min_margin_pct: float = 0.05 + + agent_share: float = 0.2 + alpha_drift: float = 0.0 + alpha_bounds: tuple[float, float] = (0.0, 0.8) + coi_strength: float = 0.25 - coi_threshold: float = 4.0 - coi_sigmoid_temp: float = 1.25 - base_human_demand: float = 0.08 - base_agent_demand: float = 0.05 - human_price_elasticity: float = -1.2 - agent_price_elasticity: float = -0.6 - w_agent_loss: float = 1.0 w_volatility: float = 5.0 w_estimation_error: float = 0.25 + seed: int = 7 -def _sigmoid(x: np.ndarray) -> np.ndarray: - return 1.0 / (1.0 + np.exp(-x)) - - -def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series: - # baseline heuristic: high velocity + low conversion - v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index)) - cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index)) - total = session_df.get("total_interactions", pd.Series(0, index=session_df.index)) - return (total >= 12) & (v >= 0.20) & (cr <= 0.01) - - -class CommercePlatform: - def __init__(self, product_catelogue_size: int, max_price: float, min_price: float, - constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None, - use_defense: bool = False): - self.product_catelogue_size = product_catelogue_size - self.max_price = max_price - self.min_price = min_price - self.constraints = constraints - self.use_defense = use_defense - self.agent_detector = agent_detector - self.simulation_history: List[Dict[str, Any]] = [] - self._rng = np.random.default_rng(constraints.seed) - self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size) - self._popularity = self._popularity / (self._popularity.mean() + 1e-12) - self._last_interaction_df: pd.DataFrame = pd.DataFrame() - - def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: - # ground truth purchase propensities - p = np.clip(prices, self.min_price, self.max_price) - pn = p / self.max_price - human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) - agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) - return { - "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95), - "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95) - } - - def _session_markup_multiplier(self, signal_score: float) -> float: - # session-based COI markup based on demand signal expression - x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6) - return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0]) - - def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: - demand = self.setup_true_demand(base_prices) - human_pprob = demand["human_purchase_prob"] - agent_pprob = demand["agent_purchase_prob"] - events: List[Dict[str, Any]] = [] - T = self.constraints.sessions_per_step - n_agent_sessions = int(round(T * self.constraints.agent_share)) - n_human_sessions = T - n_agent_sessions - - # human sessions: normal browse with possible purchase - for s in range(n_human_sessions): - session_id = f"h_{len(events)}_{s}" - k = int(self._rng.integers(1, 4)) - prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False) - t = 0.0 - inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k) - signal_score = 0.0 - purchased_any = False - - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - price_shown = float(base_prices[pid]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 1.0 - - if self._rng.random() < 0.35: - t += float(inter_times[i + k]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 2.0 - - if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])): - t += float(inter_times[i + 2 * k]) - mult = self._session_markup_multiplier(signal_score) - price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price)) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score, - }) - purchased_any = True - - # agent sessions: split recon/purchase to circumvent COI - n_agent_ids = max(1, n_agent_sessions // 2) - for a in range(n_agent_ids): - agent_id = f"a_{a}" - recon_session_id = f"{agent_id}_recon" - t = 0.0 - n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5 - inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1)) - prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views) - recon_signal = 0.0 - - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - events.append({ - "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid), - "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - recon_signal += 1.0 - - # clean purchase session with minimal interactions - if self._rng.random() < self.constraints.agent_purchase_probability: - purchase_session_id = f"{agent_id}_clean" - pid = int(self._rng.integers(0, self.product_catelogue_size)) - t2 = 0.0 - clean_signal = 0.0 - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - clean_signal += 1.0 - - if self._rng.random() < float(agent_pprob[pid]): - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - obs_mult = self._session_markup_multiplier(clean_signal) - obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price)) - oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase - oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal, - }) - - return pd.DataFrame(events) - - def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]: - if interaction_df.empty: - return {"mean_sale_price": 0.0, "look_to_book": 0.0} - purchases = interaction_df[interaction_df["action"] == "purchase"] - mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0 - views = float((interaction_df["action"] == "view").sum()) - buys = float((interaction_df["action"] == "purchase").sum()) - return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))} - - def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame: - if df.empty: - return pd.DataFrame() - g = df.groupby("session_id", sort=False) - session_duration = g["t"].max() - g["t"].min() - total_interactions = g.size() - avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0) - interaction_velocity = total_interactions / (session_duration + 1e-6) - views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False) - cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False) - purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False) - conversion_rate = purchases / (views + 1e-6) - is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False) - - return pd.DataFrame({ - "session_duration_sec": session_duration.astype(float), - "avg_time_between_events": avg_time_between.astype(float), - "total_interactions": total_interactions.astype(int), - "interaction_velocity": interaction_velocity.astype(float), - "item_views": views.astype(int), - "cart_adds": cart_adds.astype(int), - "purchases": purchases.astype(int), - "conversion_rate": conversion_rate.astype(float), - "is_agent": is_agent.astype(bool), - }).reset_index() - - def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray: - # proxy demand from weighted interaction events - if interaction_df.empty: - return np.zeros(self.product_catelogue_size, dtype=np.float32) - df = interaction_df - if exclude_sessions is not None: - bad_sessions = set(exclude_sessions.loc[exclude_sessions].index) - df = df[~df["session_id"].isin(bad_sessions)] - weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5} - w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float) - prod = df["product_id"].to_numpy(dtype=int) - q_hat = np.zeros(self.product_catelogue_size, dtype=float) - np.add.at(q_hat, prod, w) - return q_hat.astype(np.float32) - - def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]: - interaction_df = self._simulate_sessions(prices) - self._last_interaction_df = interaction_df - session_df = self._session_feature_table(interaction_df) - - predicted_agent_sessions = None - if (self.use_defense and self.agent_detector is not None and not session_df.empty): - predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id")) - - q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None) - q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \ - if predicted_agent_sessions is not None else q_hat_naive.copy() - - true_human = np.zeros(self.product_catelogue_size, dtype=float) - true_agent = np.zeros(self.product_catelogue_size, dtype=float) - if not interaction_df.empty: - purchases = interaction_df[interaction_df["action"] == "purchase"] - if not purchases.empty: - for _, r in purchases.iterrows(): - if r["actor"] == "human": - true_human[int(r["product_id"])] += 1.0 - else: - true_agent[int(r["product_id"])] += 1.0 - - revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0 - revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0 - agent_loss = max(0.0, revenue_oracle - revenue_observed) - - eps = 1e-6 - internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps) - internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps) - interaction_features = self.compute_interaction_features(interaction_df) - - summary = { - "prices": prices.copy(), - "interaction_df": interaction_df, - "session_df": session_df, - "q_hat_naive": q_hat_naive, - "q_hat_defended": q_hat_defended, - "true_human_demand": true_human.astype(np.float32), - "true_agent_purchases": true_agent.astype(np.float32), - "internal_error_naive": internal_error_naive.astype(np.float32), - "internal_error_defended": internal_error_def.astype(np.float32), - "interaction_features": interaction_features, - "revenue_observed": revenue_observed, - "revenue_oracle": revenue_oracle, - "agent_loss": agent_loss, - "predicted_agent_sessions": predicted_agent_sessions, - } - self.simulation_history.append(summary) - return summary - - def get_interaction_data(self) -> np.ndarray: - if self._last_interaction_df.empty: - return np.array([], dtype=object) - return self._last_interaction_df.to_dict(orient="records") +def make_env(constraints: Optional[BusinessLogicConstraints] = None) -> "PHANTOMEnv": + return PHANTOMEnv(constraints=constraints or BusinessLogicConstraints()) class PHANTOMEnv(gym.Env): - metadata = {"render_modes": []} + metadata = {"render_modes": ["human", "ansi"]} - def __init__(self, use_defense: bool = False): + def __init__(self, constraints: Optional[BusinessLogicConstraints] = None): super().__init__() - self.constraints = BusinessLogicConstraints() - self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, - high=self.constraints.max_price_adjustment, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32) - self.observation_space = spaces.Dict({ - "elasticity": spaces.Dict({ - "price": spaces.Box( - low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), - dtype=np.float32), - "demand": spaces.Box( - low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), - dtype=np.float32), - }) - }) - self.commerce_platform = CommercePlatform( - product_catelogue_size=self.constraints.product_catelogue_size, - max_price=self.constraints.system_max_price, - min_price=self.constraints.system_min_price, - constraints=self.constraints, - agent_detector=simple_agent_detector, - use_defense=use_defense) - self._rng = np.random.default_rng(self.constraints.seed) - self.t = 0 - self._prev_prices: Optional[np.ndarray] = None - self.state: Dict[str, Any] = {} + self.c = constraints or BusinessLogicConstraints() + self.n = int(self.c.product_catalogue_size) + + self._rng = np.random.default_rng(self.c.seed) + self._t = 0 + self._alpha_true = float(self.c.agent_share) + self._alpha_hat = float(self.c.agent_share) + self._costs = np.zeros(self.n, dtype=np.float32) + self._refs = np.zeros(self.n, dtype=np.float32) + self._prices: Optional[np.ndarray] = None + self._last_sessions: list[Session] = [] + self._last_coi: COIWindow | None = None + self._limbo = Limbo() + + self.action_space = spaces.Box( + low=np.full((self.n,), self.c.system_min_price, dtype=np.float32), + high=np.full((self.n,), self.c.system_max_price, dtype=np.float32), + dtype=np.float32, + ) + self.observation_space = spaces.Dict( + { + "elasticity": spaces.Dict( + { + "price": spaces.Box( + low=np.full((self.n,), self.c.system_min_price, dtype=np.float32), + high=np.full((self.n,), self.c.system_max_price, dtype=np.float32), + dtype=np.float32, + ), + "demand": spaces.Box( + low=np.zeros((self.n,), dtype=np.float32), + high=np.full((self.n,), 1e9, dtype=np.float32), + dtype=np.float32, + ), + } + ), + "market": spaces.Dict( + { + "alpha_hat": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32), + "revenue_rate": spaces.Box(low=0.0, high=1e12, shape=(1,), dtype=np.float32), + "conversion_rate": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32), + "price_volatility": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32), + } + ), + "cost": spaces.Box( + low=np.zeros((self.n,), dtype=np.float32), + high=np.full((self.n,), self.c.system_max_price, dtype=np.float32), + dtype=np.float32, + ), + } + ) + + def _reset_catalogue(self) -> None: + self._costs = self._rng.uniform(15.0, 60.0, size=self.n).astype(np.float32) + margins = self._rng.uniform(0.2, 0.6, size=self.n).astype(np.float32) + self._refs = (self._costs * (1.0 + margins)).astype(np.float32) + self._prices = self._refs.copy() + + def _observe_market( + self, prices: np.ndarray + ) -> tuple[list[Session], Dict[str, float], np.ndarray, np.ndarray, float, float, int]: + sessions, demand_map = put_prices_to_market( + prices, + costs=self._costs, + alpha=self._alpha_true, + n_sessions=int(self.c.sessions_per_step), + seed=int(self._rng.integers(0, 2**31 - 1)), + ) + demand_by_product = aggregate_demand_by_product(sessions, demand_map, self.n) + purchases, revenue, cost, n_agents = aggregate_purchases(sessions, self._costs, self.n) + conversion = float(np.sum(purchases) / max(len(sessions), 1)) + return sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents + + def _update_alpha_hat(self, sessions: list[Session]) -> float: + scores = [estimate_session_alpha(s) for s in sessions if s.events] + if not scores: + return self._alpha_hat + alpha_step = float(np.mean(scores)) + self._alpha_hat = 0.8 * self._alpha_hat + 0.2 * alpha_step + self._alpha_hat = float(np.clip(self._alpha_hat, 0.0, 1.0)) + return self._alpha_hat + + def _reward(self, prices: np.ndarray, revenue: float, cost: float, volatility: float) -> float: + profit = float(revenue - cost) + coi_leak = float(self._last_coi.leak) if self._last_coi else 0.0 + alpha_err = abs(self._alpha_hat - self._alpha_true) + return profit - self.c.coi_strength * coi_leak - self.c.w_volatility * volatility - self.c.w_estimation_error * alpha_err + + def _build_obs( + self, + prices: np.ndarray, + demand_by_product: np.ndarray, + revenue: float, + conversion: float, + volatility: float, + ) -> Dict[str, Any]: + return { + "elasticity": {"price": prices.astype(np.float32), "demand": demand_by_product.astype(np.float32)}, + "market": { + "alpha_hat": np.array([self._alpha_hat], dtype=np.float32), + "revenue_rate": np.array([revenue], dtype=np.float32), + "conversion_rate": np.array([conversion], dtype=np.float32), + "price_volatility": np.array([volatility], dtype=np.float32), + }, + "cost": self._costs.astype(np.float32), + } def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) if seed is not None: self._rng = np.random.default_rng(seed) - self.commerce_platform._rng = np.random.default_rng(seed) - self.t = 0 - init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) - self._prev_prices = init_prices.copy() - self.state = { - "elasticity": { - "price": init_prices, - "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), - } - } - return self.state, {} + self._t = 0 + self._alpha_true = float(np.clip(self.c.agent_share, *self.c.alpha_bounds)) + self._alpha_hat = float(self.c.agent_share) + self._reset_catalogue() + self._limbo = Limbo() + self._last_sessions = [] + self._last_coi = None - def step(self, action: np.ndarray): - self.t += 1 - base_prices = self.state["elasticity"]["price"].astype(np.float32) - new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)), - self.constraints.system_min_price, - self.constraints.system_max_price).astype(np.float32) - result = self.commerce_platform.run_pricing_simulation(new_prices) + prices = self._prices if self._prices is not None else np.zeros(self.n, dtype=np.float32) + obs = self._build_obs(prices, np.zeros(self.n, dtype=np.float32), 0.0, 0.0, 0.0) + return obs, {"alpha_true": self._alpha_true} - if self.commerce_platform.use_defense: - demand_est = result["q_hat_defended"] - internal_err = result["internal_error_defended"] - else: - demand_est = result["q_hat_naive"] - internal_err = result["internal_error_naive"] + def step(self, action: np.ndarray) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]: + if self._prices is None: + raise RuntimeError("reset() must be called before step()") - self.state["elasticity"]["price"] = new_prices - self.state["elasticity"]["demand"] = demand_est + prev = self._prices + prices = constrain_prices( + prev, + np.asarray(action, dtype=np.float32), + costs=self._costs, + min_price=float(self.c.system_min_price), + max_price=float(self.c.system_max_price), + max_adjustment=float(self.c.max_price_adjustment), + min_margin_pct=float(self.c.min_margin_pct), + ) + self._prices = prices + self._limbo.add_update("prices", prices) - volatility = 0.0 if self._prev_prices is None else \ - float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) - self._prev_prices = new_prices.copy() + sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents = self._observe_market(prices) + self._last_sessions = sessions + self._limbo.add_update("demand", demand_map) - revenue_observed = float(result["revenue_observed"]) - agent_loss = float(result["agent_loss"]) - err_mean = float(np.mean(internal_err)) + self._update_alpha_hat(self._last_sessions) + self._last_coi = compute_coi_window(self._last_sessions, self._costs, demand_mapping=demand_map) - reward = (revenue_observed - - self.constraints.w_agent_loss * agent_loss - - self.constraints.w_volatility * volatility - - self.constraints.w_estimation_error * err_mean) + self._alpha_true = float(np.clip(self._alpha_true + self.c.alpha_drift, *self.c.alpha_bounds)) + volatility = float(np.std((prices - prev) / (prev + 1e-6))) + reward = float(self._reward(prices, revenue, cost, volatility)) + conversion = float(np.sum(purchases) / max(len(self._last_sessions), 1)) - terminated = self.t >= self.constraints.episode_length + self._t += 1 + terminated = self._t >= int(self.c.max_steps) + + obs = self._build_obs(prices, demand_by_product, revenue, conversion, min(volatility, 1.0)) info = { - "t": self.t, - "revenue_observed": revenue_observed, - "revenue_oracle": float(result["revenue_oracle"]), - "agent_loss": agent_loss, - "ux_volatility": volatility, - "mean_internal_error": err_mean, - "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), - "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), - "true_human_purchases_total": float(np.sum(result["true_human_demand"])), - "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), + "step": self._t, + "reward": reward, + "revenue": float(revenue), + "profit": float(revenue - cost), + "n_sessions": int(self.c.sessions_per_step), + "n_agents": int(n_agents), + "alpha_true": float(self._alpha_true), + "alpha_hat": float(self._alpha_hat), + "alpha_error": float(abs(self._alpha_hat - self._alpha_true)), + "price_std": float(np.std(prices)), + "price_volatility": float(volatility), } - return self.state, float(reward), terminated, False, info + if self._last_coi is not None: + info.update( + { + "coi_policy": float(self._last_coi.policy), + "coi_agent": float(self._last_coi.agent), + "coi_leakage": float(self._last_coi.leak), + "coi_survival": float(self._last_coi.survival_ratio), + "coi_erosion": float(coi_erosion(self._last_coi.policy, self._last_coi.agent)), + } + ) + return obs, reward, terminated, False, info + def render(self, mode: str = "human") -> str | None: + if self._prices is None: + return None + out = ( + f"t={self._t}/{self.c.max_steps} " + f"alpha_true={self._alpha_true:.3f} alpha_hat={self._alpha_hat:.3f} " + f"price_std={float(np.std(self._prices)):.2f}" + ) + if mode == "human": + print(out) + return out -if __name__ == "__main__": - import matplotlib.pyplot as plt - from collections import defaultdict - - runs = {} - for use_defense in (False, True): - env = PHANTOMEnv(use_defense=use_defense) - obs, _ = env.reset(seed=42) - metrics = defaultdict(list) - total_reward = 0.0 - done = False - - while not done: - action = env.action_space.sample() - obs, reward, done, _, info = env.step(action) - total_reward += reward - p_mean = float(np.mean(obs["elasticity"]["price"])) - q_mean = float(np.mean(obs["elasticity"]["demand"])) - p_std = float(np.std(obs["elasticity"]["price"])) - - metrics['t'].append(info['t']) - metrics['price_mean'].append(p_mean) - metrics['price_std'].append(p_std) - metrics['demand_mean'].append(q_mean) - metrics['revenue_observed'].append(info['revenue_observed']) - metrics['revenue_oracle'].append(info['revenue_oracle']) - metrics['agent_loss'].append(info['agent_loss']) - metrics['ux_volatility'].append(info['ux_volatility']) - metrics['look_to_book'].append(info['look_to_book']) - metrics['reward'].append(reward) - metrics['human_purchases'].append(info['true_human_purchases_total']) - metrics['agent_purchases'].append(info['true_agent_purchases_total']) - - if info['t'] % 20 == 0 or done: - print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} " - f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " - f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " - f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") - - runs[use_defense] = metrics - print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n") - - fig, axes = plt.subplots(3, 3, figsize=(15, 12)) - fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold') - - plot_configs = [ - ('price_mean', 'Mean Price', 'Price'), - ('demand_mean', 'Mean Demand Estimate', 'Demand'), - ('revenue_observed', 'Revenue (Observed)', 'Revenue'), - ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'), - ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'), - ('look_to_book', 'Look-to-Book Ratio', 'Ratio'), - ('reward', 'Step Reward', 'Reward'), - ('human_purchases', 'Human Purchases', 'Count'), - ('agent_purchases', 'Agent Purchases', 'Count'), - ] - - for idx, (key, title, ylabel) in enumerate(plot_configs): - ax = axes[idx // 3, idx % 3] - for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]: - m = runs[use_defense] - ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5) - ax.set_xlabel('Step') - ax.set_ylabel(ylabel) - ax.set_title(title, fontsize=10, fontweight='bold') - ax.legend(loc='best', fontsize=8) - ax.grid(True, alpha=0.3) - - plt.tight_layout() - plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight') - print("Plot saved to phantom_env_comparison.png") - plt.show() + def close(self) -> None: + return diff --git a/sim/rl/jax_core/__init__.py b/sim/rl/jax_core/__init__.py new file mode 100644 index 0000000..99d5a87 --- /dev/null +++ b/sim/rl/jax_core/__init__.py @@ -0,0 +1,11 @@ +"""JAX-accelerated simulation core for PHANTOM environment.""" +from .transitions import TransitionData, compile_transitions, fallback_transitions, JAX_AVAILABLE +from .simulation import SessionBatch, SimResult, sample_sessions, compute_metrics +from .features import session_features, compute_session_transitions +from .separability import compute_divergences, estimate_alpha_batch + +__all__ = [ + "JAX_AVAILABLE", "TransitionData", "compile_transitions", "fallback_transitions", + "SessionBatch", "SimResult", "sample_sessions", "compute_metrics", + "session_features", "compute_session_transitions", "compute_divergences", "estimate_alpha_batch", +] diff --git a/sim/rl/jax_core/features.py b/sim/rl/jax_core/features.py new file mode 100644 index 0000000..d5af957 --- /dev/null +++ b/sim/rl/jax_core/features.py @@ -0,0 +1,69 @@ +"""Vectorized session feature extraction.""" +import numpy as np +from .transitions import N_STATES, PURCHASE_IDX, CART_IDX +from .simulation import SessionBatch + +try: + import jax.numpy as jnp + from jax import jit + JAX_AVAILABLE = True +except ImportError: + jnp, JAX_AVAILABLE = np, False + def jit(f): return f + +@jit +def extract_features(states, dwells, lengths): + """Extract per-session features. Returns (n_sess, 9) array.""" + n, max_len = states.shape + mask = jnp.arange(max_len)[None,:] < lengths[:,None] + duration = jnp.sum(dwells * mask, axis=1) + total = lengths.astype(jnp.float32) + count = lambda idx: jnp.sum((states == idx) & mask, axis=1).astype(jnp.float32) + views, learn, carts, purchases = count(1), count(2), count(3), count(4) + velocity = total / (duration + 1e-6) + conversion = purchases / (views + 1e-6) + avg_dwell = duration / (total + 1e-6) + return jnp.stack([duration, avg_dwell, total, velocity, views, carts, purchases, learn, conversion], axis=1) + +def session_features(batch: SessionBatch) -> np.ndarray: + if JAX_AVAILABLE: + return np.asarray(extract_features(jnp.array(batch.states), jnp.array(batch.dwells), jnp.array(batch.lengths))) + # numpy fallback + n, max_len = batch.states.shape + mask = np.arange(max_len)[None,:] < batch.lengths[:,None] + duration = np.sum(batch.dwells * mask, axis=1) + total = batch.lengths.astype(np.float32) + count = lambda idx: np.sum((batch.states == idx) & mask, axis=1).astype(np.float32) + views, learn, carts, purchases = count(1), count(2), count(3), count(4) + return np.stack([duration, duration/(total+1e-6), total, total/(duration+1e-6), views, carts, purchases, learn, purchases/(views+1e-6)], axis=1) + +@jit +def session_transitions(states, lengths, n_states=N_STATES): + """Compute empirical transition counts per session. Returns (n_sess, n_states, n_states).""" + n, max_len = states.shape + mask = jnp.arange(max_len - 1)[None,:] < (lengths[:,None] - 1) + src, dst = states[:, :-1], states[:, 1:] + # handle -1 padding by clamping to valid range + src_c, dst_c = jnp.clip(src, 0, n_states-1), jnp.clip(dst, 0, n_states-1) + valid = mask & (src >= 0) & (dst >= 0) + def per_session(i): + s, d, v = src_c[i], dst_c[i], valid[i] + trans = (jnp.eye(n_states)[s,:,None] * jnp.eye(n_states)[d,None,:]).sum(0) * v[:,None,None] + return trans.sum(0) + # vmap not ideal here, use manual loop for clarity + trans = jnp.stack([per_session(i) for i in range(n)]) + row_sums = trans.sum(axis=-1, keepdims=True) + return trans / (row_sums + 1e-10) + +def compute_session_transitions(batch: SessionBatch) -> np.ndarray: + if JAX_AVAILABLE: + return np.asarray(session_transitions(jnp.array(batch.states), jnp.array(batch.lengths))) + # numpy fallback + n, max_len = batch.states.shape + trans = np.zeros((n, N_STATES, N_STATES), dtype=np.float32) + for i in range(n): + for t in range(batch.lengths[i] - 1): + s, d = batch.states[i, t], batch.states[i, t+1] + if s >= 0 and d >= 0: trans[i, s, d] += 1 + row_sums = trans.sum(axis=-1, keepdims=True) + return trans / (row_sums + 1e-10) diff --git a/sim/rl/jax_core/separability.py b/sim/rl/jax_core/separability.py new file mode 100644 index 0000000..c0c0293 --- /dev/null +++ b/sim/rl/jax_core/separability.py @@ -0,0 +1,43 @@ +"""Vectorized KL divergence for separability scoring.""" +import numpy as np +from typing import Tuple + +try: + import jax.numpy as jnp + from jax import jit + JAX_AVAILABLE = True +except ImportError: + jnp, JAX_AVAILABLE = np, False + def jit(f): return f + +@jit +def batch_kl(P, Q_human, Q_agent, eps=1e-10): + """Compute KL(P||Q) for batched P. P:(n,s,s), Q:(s,s). Returns (delta_h, delta_a) each (n,).""" + p = P + eps + p = p / p.sum(axis=-1, keepdims=True) + qh, qa = Q_human[None] + eps, Q_agent[None] + eps + delta_h = jnp.sum(p * jnp.log(p / qh), axis=(1, 2)) + delta_a = jnp.sum(p * jnp.log(p / qa), axis=(1, 2)) + return delta_h, delta_a + +def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Compute KL divergence of each session from human/agent prototypes.""" + if JAX_AVAILABLE: + dh, da = batch_kl(jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent)) + return np.asarray(dh), np.asarray(da) + # numpy fallback + eps = 1e-10 + p = session_trans + eps + p = p / p.sum(axis=-1, keepdims=True) + qh, qa = ref_human[None] + eps, ref_agent[None] + eps + delta_h = np.sum(p * np.log(p / qh), axis=(1, 2)) + delta_a = np.sum(p * np.log(p / qa), axis=(1, 2)) + return delta_h, delta_a + +def estimate_alpha_batch(prob_agent: np.ndarray, delta_h: np.ndarray, delta_a: np.ndarray, temp: float = 1.0) -> np.ndarray: + """Vectorized alpha estimation from classifier probs and divergences.""" + mass = delta_h + delta_a + ratio = np.where(mass > 1e-8, delta_a / mass, 0.5) + blended = 0.5 * prob_agent + 0.5 * ratio + if temp <= 0: return np.clip(blended, 0.0, 1.0) + return np.clip(1.0 / (1.0 + np.exp(-temp * (blended - 0.5))), 0.0, 1.0) diff --git a/sim/rl/jax_core/simulation.py b/sim/rl/jax_core/simulation.py new file mode 100644 index 0000000..9532b3d --- /dev/null +++ b/sim/rl/jax_core/simulation.py @@ -0,0 +1,116 @@ +"""Vectorized Markov chain session sampling with JAX.""" +from typing import NamedTuple, Tuple +import numpy as np +from functools import partial + +try: + import jax, jax.numpy as jnp + from jax import lax + JAX_AVAILABLE = True +except ImportError: + JAX_AVAILABLE = False + +from .transitions import TransitionData, N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX + +class SessionBatch(NamedTuple): + states: np.ndarray # (n_sess, max_len) state indices, -1=padding + dwells: np.ndarray # (n_sess, max_len) dwell times + products: np.ndarray # (n_sess,) product index per session + actors: np.ndarray # (n_sess,) 0=human, 1=agent + lengths: np.ndarray # (n_sess,) actual session length + +class SimResult(NamedTuple): + demand_human: np.ndarray + demand_agent: np.ndarray + revenue: float + revenue_oracle: float + agent_loss: float + coi: float + look_to_book: float + mean_sale_price: float + n_human_purchases: int + n_agent_purchases: int + sessions: SessionBatch + +if JAX_AVAILABLE: + @partial(jax.jit, static_argnums=(5,6,7)) + def _sample_sessions_jax(key, T_human, T_agent, dwell_human, dwell_agent, n_human, n_agent, max_steps): + n = n_human + n_agent + k1, k2, k3, k4 = jax.random.split(key, 4) + actors = jnp.concatenate([jnp.zeros(n_human, dtype=jnp.int32), jnp.ones(n_agent, dtype=jnp.int32)]) + T = jnp.where(actors[:,None,None]==0, T_human[None], T_agent[None]) # (n,6,6) + dwell_p = jnp.where(actors[:,None,None]==0, dwell_human[None], dwell_agent[None]) # (n,6,2) + + def step(carry, _): + s, active, k = carry + k, k1, k2 = jax.random.split(k, 3) + probs = T[jnp.arange(n), s] # (n,6) + nxt = jax.random.categorical(k1, jnp.log(probs + 1e-10)) + nxt = jnp.where(active, nxt, -1) + shape = dwell_p[jnp.arange(n), s, 0] + scale = dwell_p[jnp.arange(n), s, 1] + dwell = jnp.maximum(0.3, jax.random.gamma(k2, shape) * scale) + still = active & (nxt != TERM_IDX) & (nxt >= 0) + return (nxt, still, k), (nxt, dwell) + + init = (jnp.zeros(n, dtype=jnp.int32), jnp.ones(n, dtype=jnp.bool_), k3) + _, (states, dwells) = lax.scan(step, init, None, length=max_steps) + states, dwells = states.T, dwells.T # (n, max_steps) + is_term = (states == -1) | (states == TERM_IDX) + lengths = jnp.argmax(is_term, axis=1) + 1 + lengths = jnp.where(jnp.any(is_term, axis=1), lengths, max_steps) + return states, dwells, actors, lengths + +def sample_sessions(key, trans: TransitionData, n_human: int, n_agent: int, n_products: int, max_steps: int = 40) -> SessionBatch: + if JAX_AVAILABLE: + k1, k2 = jax.random.split(key) + states, dwells, actors, lengths = _sample_sessions_jax(k1, trans.human_T, trans.agent_T, trans.human_dwell, trans.agent_dwell, n_human, n_agent, max_steps) + products = jax.random.randint(k2, (n_human + n_agent,), 0, n_products) + return SessionBatch(np.asarray(states), np.asarray(dwells), np.asarray(products), np.asarray(actors), np.asarray(lengths)) + # numpy fallback + rng = np.random.default_rng(int(key[0]) if hasattr(key, '__getitem__') else 42) + n = n_human + n_agent + actors = np.concatenate([np.zeros(n_human, dtype=np.int32), np.ones(n_agent, dtype=np.int32)]) + products = rng.integers(0, n_products, size=n) + states, dwells = np.full((n, max_steps), -1, dtype=np.int32), np.zeros((n, max_steps), dtype=np.float32) + lengths = np.zeros(n, dtype=np.int32) + for i in range(n): + T = trans.human_T if actors[i] == 0 else trans.agent_T + dp = trans.human_dwell if actors[i] == 0 else trans.agent_dwell + s, t = 0, 0 + while t < max_steps and s != TERM_IDX: + states[i, t] = s + dwells[i, t] = max(0.3, rng.gamma(dp[s, 0], dp[s, 1])) + s = rng.choice(N_STATES, p=T[s]) + t += 1 + lengths[i] = t + return SessionBatch(states, dwells, products, actors, lengths) + +def compute_metrics(batch: SessionBatch, prices: np.ndarray, unit_cost: np.ndarray, base_price: np.ndarray) -> SimResult: + purchased = np.any(batch.states == PURCHASE_IDX, axis=1) + human_mask, agent_mask = batch.actors == 0, batch.actors == 1 + human_purch, agent_purch = purchased & human_mask, purchased & agent_mask + demand_h = np.bincount(batch.products[human_purch], minlength=len(prices)).astype(np.float32) + demand_a = np.bincount(batch.products[agent_purch], minlength=len(prices)).astype(np.float32) + # revenue and oracle + purch_products = batch.products[purchased] + revenue = float(np.sum(prices[purch_products])) + revenue_oracle = float(np.sum(base_price[purch_products])) + # agent loss: base_price - price_paid for agent purchases (agents gaming the system) + agent_products = batch.products[agent_purch] + agent_loss = float(np.sum(base_price[agent_products] - prices[agent_products])) + # COI: margin - expected_premium*0.5 for human purchases + human_products = batch.products[human_purch] + if len(human_products) > 0: + margin = float(np.mean(prices[human_products] - unit_cost[human_products])) + premium = float(np.mean(base_price[human_products] - prices[human_products])) + coi = max(0.0, margin - premium * 0.5) + else: + coi = 0.0 + # look to book: views / purchases + views = float(np.sum(batch.states == 1)) # view_item_page = index 1 + n_purch = int(purchased.sum()) + look_to_book = views / (n_purch + 1e-6) + mean_sale = float(np.mean(prices[purch_products])) if n_purch > 0 else 0.0 + return SimResult(demand_h, demand_a, revenue, revenue_oracle, agent_loss, coi, look_to_book, mean_sale, + int(human_purch.sum()), int(agent_purch.sum()), batch) diff --git a/sim/rl/jax_core/transitions.py b/sim/rl/jax_core/transitions.py new file mode 100644 index 0000000..6aec650 --- /dev/null +++ b/sim/rl/jax_core/transitions.py @@ -0,0 +1,47 @@ +"""Dense transition matrices for JAX Markov chain sampling.""" +from dataclasses import dataclass +import numpy as np + +try: + import jax.numpy as jnp + JAX_AVAILABLE = True +except ImportError: + jnp, JAX_AVAILABLE = np, False + +STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"] +S2I = {s: i for i, s in enumerate(STATES)} +N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX = len(STATES), 5, 4, 3 + +@dataclass +class TransitionData: + human_T: np.ndarray # (6,6) transition probs + agent_T: np.ndarray # (6,6) + human_dwell: np.ndarray # (6,2) shape,scale + agent_dwell: np.ndarray # (6,2) + + def to_jax(self): + if not JAX_AVAILABLE: return self + return TransitionData(*[jnp.array(x) for x in [self.human_T, self.agent_T, self.human_dwell, self.agent_dwell]]) + +def dict_to_dense(d): + m = np.zeros((N_STATES, N_STATES), dtype=np.float32) + for src, dsts in d.items(): + if (i := S2I.get(src)) is not None: + for dst, p in dsts.items(): + if (j := S2I.get(dst)) is not None: m[i,j] = p + m /= np.maximum(m.sum(1, keepdims=True), 1e-8) + m[TERM_IDX] = 0; m[TERM_IDX, TERM_IDX] = 1.0 + return m + +def compile_transitions(human_profile, agent_profile): + def dwell_arr(params): return np.array([[params.get(s, (2.0, 1.0)) for s in STATES]], dtype=np.float32).reshape(N_STATES, 2) + return TransitionData(dict_to_dense(human_profile.transitions), dict_to_dense(agent_profile.transitions), + dwell_arr(human_profile.dwell_params), dwell_arr(agent_profile.dwell_params)) + +def fallback_transitions(): + H = {"session_start": {"view_item_page": .85, "session_end": .15}, "view_item_page": {"learn_more_about_item": .4, "add_item_to_cart": .3, "view_item_page": .2, "session_end": .1}, + "learn_more_about_item": {"add_item_to_cart": .5, "view_item_page": .3, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .6, "view_item_page": .25, "session_end": .15}, "purchase_complete": {"session_end": 1.0}} + A = {"session_start": {"view_item_page": .9, "session_end": .1}, "view_item_page": {"learn_more_about_item": .5, "add_item_to_cart": .25, "view_item_page": .15, "session_end": .1}, + "learn_more_about_item": {"add_item_to_cart": .4, "view_item_page": .4, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .5, "view_item_page": .3, "session_end": .2}, "purchase_complete": {"session_end": 1.0}} + dwell = np.full((N_STATES, 2), [2.0, 1.0], dtype=np.float32) + return TransitionData(dict_to_dense(H), dict_to_dense(A), dwell.copy(), dwell.copy()) diff --git a/sim/rl/train.py b/sim/rl/train.py new file mode 100644 index 0000000..1d21f24 --- /dev/null +++ b/sim/rl/train.py @@ -0,0 +1,175 @@ +import numpy as np +import logging +from pathlib import Path +from typing import Dict, Type, Optional +import pickle +from torch.utils.tensorboard import SummaryWriter +from sim.rl.environment import PHANTOMEnv, BusinessLogicConstraints + +logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') +logger = logging.getLogger(__name__) + +try: + from sim.rl.engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine, + SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine) +except ImportError as e: + BasePricingEngine = None # engines not required for basic usage + print(e) + + +""" +Target training loop: +have base prices p0 from env reset and run the env step, collect reward and metrics +pass this to the pricing engine which computes the price action to take based on previous reward by learning +the new action gets passed to the step +so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward +to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices +CURRENT SOLUTION BELOW does not implement correct learning or updates. +""" + +class EngineTrainer: + """wrapper to run pricing engines through episodes and collect metrics""" + def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None): + self.engine = engine + self.env = env + self.episode_metrics = [] + self.tb_writer = tb_writer + self.global_step = 0 + + def train(self, n_episodes: int, seed: int = 42): + for ep in range(n_episodes): + obs, _ = self.env.reset(seed=seed + ep) + self.engine.reset() + done = False + prev_prices = obs["elasticity"]["price"] + episode_reward = 0.0 + last_info: Dict[str, float] = {} + while not done: + action_prices = self.engine.compute_prices(prev_prices, obs) + obs, reward, done, _, info = self.env.step(action_prices) + self.engine.update(obs, reward, done, info) + episode_reward += reward + prev_prices = obs["elasticity"]["price"] + last_info = info + if self.tb_writer: + self.tb_writer.add_scalar("reward/step", reward, self.global_step) + if "coi" in info: + self.tb_writer.add_scalar("diagnostics/coi", info["coi"], self.global_step) + if "alpha_hat" in info: + self.tb_writer.add_scalar("diagnostics/alpha_hat", info["alpha_hat"], self.global_step) + self.global_step += 1 + last_info = dict(last_info) + last_info.update({"episode_reward": episode_reward, "episode": ep}) + self.episode_metrics.append(last_info) + if self.tb_writer: + self.tb_writer.add_scalar("reward/episode", episode_reward, ep) + return self + + def run_episode(self, seed: int = 42) -> Dict: + """run single evaluation episode and return metrics""" + obs, _ = self.env.reset(seed=seed) + self.engine.reset() + total_reward = 0.0 + prev_prices = obs["elasticity"]["price"] + ep_metrics = {'total_reward': 0.0} + done = False + while not done: + action_prices = self.engine.compute_prices(prev_prices, obs) + obs, reward, done, _, info = self.env.step(action_prices) + total_reward += reward + for k, v in info.items(): + ep_metrics[k] = v + prev_prices = obs["elasticity"]["price"] + ep_metrics['total_reward'] = total_reward + return ep_metrics + + def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict: + """evaluate trained engine""" + results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle', + 'agent_loss', 'ux_volatility', 'look_to_book']} + for ep in range(n_episodes): + metrics = self.run_episode(seed=seed + ep) + for k in results: + results[k].append(metrics.get(k, 0.0)) + return {k: (np.mean(v), np.std(v)) for k, v in results.items()} + + +def make_env(): + return PHANTOMEnv(constraints=BusinessLogicConstraints()) + + +def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42, + tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: + constraints = env.constraints + engine = engine_cls(constraints=constraints, seed=seed) + trainer = EngineTrainer(engine, env, tb_writer=tb_writer) + trainer.train(n_episodes, seed=seed) + return trainer + + +def save_trainer(trainer: EngineTrainer, path: Path): + """save engine state and metrics""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, 'wb') as f: + pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f) + logger.info(f"Saved trainer to {path}") + + +def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: + """load saved engine""" + with open(path, 'rb') as f: + data = pickle.load(f) + trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer) + trainer.episode_metrics = data['metrics'] + return trainer + + +if __name__ == "__main__": + if BasePricingEngine is None: + logger.error("Engines not available, cannot run training") + exit(1) + + base_dir = Path("./sim/rl/runs") + base_dir.mkdir(exist_ok=True) + + engines = { + "Wild": WildPricingEngine, + "Static": StaticPricingEngine, + "RandomWalk": RandomWalkEngine, + "ThompsonSampling": ThompsonSamplingEngine, + } + n_train_episodes = 50 + n_eval_episodes = 10 + seed = 42 + + logger.info(f"Training config: {n_train_episodes} episodes per engine") + + trained_trainers = {} + + for engine_name, engine_cls in engines.items(): + run_name = engine_name + log_dir = base_dir / run_name + log_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"Training {engine_name}") + logger.info(f"Log directory: {log_dir}") + + env = make_env() + tb_writer = SummaryWriter(log_dir=str(log_dir)) + trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer) + tb_writer.close() + + save_path = log_dir / "trainer.pkl" + save_trainer(trainer, save_path) + + trained_trainers[run_name] = (trainer, env) + + logger.info("Starting evaluation") + + for run_name, (trainer, env) in trained_trainers.items(): + logger.info(f"Evaluating {run_name}") + results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000) + for metric, (mean, std) in results.items(): + logger.info(f" {metric:20s}: {mean:10.2f} ± {std:6.2f}") + + logger.info(f"Results saved to: {base_dir}") diff --git a/sim/strong_learner/data.py b/sim/strong_learner/data.py new file mode 100644 index 0000000..e22c7db --- /dev/null +++ b/sim/strong_learner/data.py @@ -0,0 +1,108 @@ +import os +import requests +try: + import py7zr # type: ignore +except ImportError: # pragma: no cover - optional dependency + py7zr = None +import pandas as pd +from typing import Generator +try: + from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader +except ImportError: + from loader import PayloadModel, ValueModel, InteractionModel, Loader + +class YooChooseLoader(Loader): + URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z" + CLICK_COLS = ['session_id', 'ts', 'item_id', 'category'] + BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity'] + + def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000): + self.root = root_dir + self.chunk_size = chunk_size + self.max_sessions = max_sessions + self.click_path = f"{root_dir}/yoochoose-clicks.dat" + self.buy_path = f"{root_dir}/yoochoose-buys.dat" + if not os.path.exists(self.click_path): self._setup() + self.data = self._load_sessions(max_sessions) + self.entries = list(self.data.keys()) + + def _setup(self): + if py7zr is None: + raise RuntimeError("py7zr is required to unpack YooChoose dataset. Install py7zr first.") + os.makedirs(self.root, exist_ok=True) + zip_path = f"{self.root}/temp.7z" + with requests.get(self.URL, stream=True) as r: + with open(zip_path, 'wb') as f: + for chunk in r.iter_content(8192): + f.write(chunk) + with py7zr.SevenZipFile(zip_path, 'r') as z: + z.extractall(self.root) + os.remove(zip_path) + + def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel: + payload = PayloadModel( + sessionId=sid, experimentId=None, eventName=event, + page=page, productId=item_id, metadata=meta, + storeMode="yoochoose", userAgent="dataset", ts=ts + ) + return InteractionModel( + partitionID=0, offset=0, timestamp=0, compression="", + isTransactional=False, headers=[], key={}, + value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0) + ) + + def _parse_category(self, cat) -> str: + if pd.isna(cat) or cat == "0": return "unknown" + if cat == "S": return "special_offer" + try: + n = int(cat) + return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}" + except: return str(cat) + + def stream_clicks(self) -> Generator[InteractionModel, None, None]: + with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader: + for chunk in reader: + for r in chunk.itertuples(index=False): + yield self._make_interaction( + str(r.session_id), r.ts, str(r.item_id), + "view_item_page", self._parse_category(r.category), {} + ) + + def stream_buys(self) -> Generator[InteractionModel, None, None]: + with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader: + for chunk in reader: + for r in chunk.itertuples(index=False): + yield self._make_interaction( + str(r.session_id), r.ts, str(r.item_id), + "purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity} + ) + + def stream(self) -> Generator[InteractionModel, None, None]: + yield from self.stream_clicks() + yield from self.stream_buys() + + def _load_sessions(self, max_sessions: int | None = None) -> dict: + sessions = {} + for interaction in self.stream(): + sid = interaction.value.payload.sessionId + if sid not in sessions: + if max_sessions and len(sessions) >= max_sessions: continue + sessions[sid] = [] + sessions[sid].append(interaction) + for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts) + return sessions + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +if __name__ == "__main__": + loader = YooChooseLoader(max_sessions=100) + views, purchases = 0, 0 + for sid, evts in loader.get_data().items(): + for e in evts: + if e.value.payload.eventName == "view_item_page": views += 1 + elif e.value.payload.eventName == "purchase_complete": purchases += 1 + print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete") diff --git a/tests/e2e/.env.example b/tests/e2e/.env.example new file mode 100644 index 0000000..9e5dee5 --- /dev/null +++ b/tests/e2e/.env.example @@ -0,0 +1,7 @@ +WEB_URL=http://localhost:3000 +BACKEND_URL=http://localhost:5000 +PRICING_PROVIDER_URL=http://localhost:5001 +AIRFLOW_URL=http://localhost:8085 +AIRFLOW_USER=admin +AIRFLOW_PASS=admin +HEADLESS=true diff --git a/tests/e2e/helpers/airflow.ts b/tests/e2e/helpers/airflow.ts new file mode 100644 index 0000000..82d4a75 --- /dev/null +++ b/tests/e2e/helpers/airflow.ts @@ -0,0 +1,61 @@ +const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085'; +const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64'); + +const req = (path: string, opts: any = {}) => { + const headers = { Authorization: AUTH, ...opts.headers }; + return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers }); +}; + +export const triggerDag = async (dagId: string, conf = {}) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ conf }), + }); + if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`); + return (await r.json()).dag_run_id; +}; + +export const getDagStatus = async (dagId: string, runId: string) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`); + if (!r.ok) throw new Error(`Get status failed: ${r.status}`); + return (await r.json()).state; +}; + +export const cancelDag = async (dagId: string, runId: string) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ state: 'failed' }), + }); + if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`); +}; + +export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => { + const t0 = Date.now(); + while (Date.now() - t0 < maxMs) { + const state = await getDagStatus(dagId, runId); + if (state === 'success') return; + if (state === 'failed') throw new Error(`DAG ${runId} failed`); + await new Promise(r => setTimeout(r, pollMs)); + } + await cancelDag(dagId, runId); + throw new Error(`DAG ${runId} timeout`); +}; + +export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => { + const runId = await triggerDag(dagId, conf); + await waitForDag(dagId, runId, maxMs); +}; + +export const runSessionPricing = (mode = 'hotel') => + runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000); + +export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) => + runDag('surge_pricing_pipeline', { + store_mode: mode, + high_threshold: highThresh, + low_threshold: lowThresh, + surge_multiplier: 1.2, + discount_multiplier: 0.9 + }, 90000); diff --git a/tests/e2e/helpers/kafka.ts b/tests/e2e/helpers/kafka.ts index c0a95dd..18b977d 100644 --- a/tests/e2e/helpers/kafka.ts +++ b/tests/e2e/helpers/kafka.ts @@ -9,8 +9,8 @@ interface InteractionEvent { const dumpKafkaTopic = async (backendUrl: string, topic: string) => { const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`); if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`); - const { messages = [] } = await resp.json(); - return messages as any[]; + const { data = [] } = await resp.json(); + return data as any[]; }; export const waitForInteractionEvent = async ( diff --git a/tests/e2e/playwright.config.ts b/tests/e2e/playwright.config.ts index 54a5561..dc3c815 100644 --- a/tests/e2e/playwright.config.ts +++ b/tests/e2e/playwright.config.ts @@ -5,14 +5,14 @@ export default defineConfig({ fullyParallel: true, forbidOnly: !!process.env.CI, retries: 0, - workers: 5, + workers: 1, reporter: 'list', use: { baseURL: process.env.WEB_URL || 'http://localhost:3000', trace: 'retain-on-failure', screenshot: 'only-on-failure', }, - timeout: 60000, + timeout: 180000, expect: { timeout: 10000, }, diff --git a/tests/e2e/scenarios/session-aware.spec.ts b/tests/e2e/scenarios/session-aware.spec.ts index b204984..5c27747 100644 --- a/tests/e2e/scenarios/session-aware.spec.ts +++ b/tests/e2e/scenarios/session-aware.spec.ts @@ -9,6 +9,7 @@ import { addToCart, } from '../helpers/interactions'; import { getSessionEvents } from '../helpers/kafka'; +import { runSessionPricing } from '../helpers/airflow'; test.describe('SessionAwarePricer E2E', () => { const STORE_TYPE = 'hotel'; @@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => { await page.waitForTimeout(1500); const productId2 = await humanLikeViewProduct(page, STORE_TYPE); + + await runSessionPricing(STORE_TYPE); + const secondPrice = await getPriceFromDOM(page); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); @@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => { await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); - await page.waitForTimeout(2500); + await page.waitForTimeout(1000); const events = await getSessionEvents(backendUrl, sessionId); expect(events.length).toBeGreaterThanOrEqual(8); + await runSessionPricing(STORE_TYPE); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const agentPrice = await getPriceFromDOM(page); @@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => { const productId = await viewProductViaFlow(page, STORE_TYPE); const baselinePrice = await getPriceFromDOM(page); - const startTime = Date.now(); await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE); - const duration = (Date.now() - startTime) / 1000; - const eventsPerSec = 10 / duration; - expect(eventsPerSec).toBeGreaterThan(2.0); + const events = await getSessionEvents(backendUrl, sessionId); + expect(events.length).toBeGreaterThanOrEqual(10); - await page.waitForTimeout(2000); + await runSessionPricing(STORE_TYPE); await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); @@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => { await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE); - await page.waitForTimeout(1500); + await page.waitForTimeout(1000); await humanLikeViewProduct(page, STORE_TYPE); + + await runSessionPricing(STORE_TYPE); + const finalPrice = await getPriceFromDOM(page); expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3); diff --git a/tests/e2e/scenarios/surge-pricing.spec.ts b/tests/e2e/scenarios/surge-pricing.spec.ts index e3e2f8d..26d29d3 100644 --- a/tests/e2e/scenarios/surge-pricing.spec.ts +++ b/tests/e2e/scenarios/surge-pricing.spec.ts @@ -7,6 +7,7 @@ import { verifySessionConsistency, } from '../helpers/interactions'; import { waitForInteractionEvent, countProductViews } from '../helpers/kafka'; +import { runSurgePricing } from '../helpers/airflow'; test.describe('SimpleSurgePricer E2E', () => { const STORE_TYPE = 'hotel'; @@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => { await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE); - await page.waitForTimeout(2000); + await page.waitForTimeout(1000); const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page'); expect(evt).not.toBeNull(); @@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => { const viewCount = await countProductViews(backendUrl, productId); expect(viewCount).toBeGreaterThanOrEqual(5); + await runSurgePricing(STORE_TYPE, 3, 1); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const surgedPrice = await getPriceFromDOM(page); @@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => { await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE); - await page.waitForTimeout(1500); + await page.waitForTimeout(1000); + + await runSurgePricing(STORE_TYPE, 3, 1); await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); @@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => { await page.waitForTimeout(12000); + await runSurgePricing(STORE_TYPE, 3, 1); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const decayedPrice = await getPriceFromDOM(page); diff --git a/web/src/app/api/pricing/route.ts b/web/src/app/api/pricing/route.ts index 1aec75b..6532131 100644 --- a/web/src/app/api/pricing/route.ts +++ b/web/src/app/api/pricing/route.ts @@ -30,6 +30,8 @@ export async function GET(req: NextRequest) { const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001'; try { const queryParams = new URLSearchParams(); + // THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed + // The whole pipeline gets triggered from here. if (sessionId) queryParams.append('sessionId', sessionId); if (experimentId) queryParams.append('experimentId', experimentId); @@ -55,25 +57,26 @@ export async function GET(req: NextRequest) { price = Math.round(randomBase * 100) / 100; } - // log price to kafka for elasticity computation + // log price to kafka asynchronously (non-blocking) if (sessionId) { const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000'; - try { - await fetch(`${backendUrl}/api/kafka/price-log`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - productId, - price, - sessionId, - experimentId: experimentId || undefined, - storeMode, - ts: timestamp, - }), - }); - } catch (err) { - console.error('[price-log-error]', err); - } + // fire and forget - don't await to avoid blocking response + fetch(`${backendUrl}/api/kafka/price-log`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + productId, + price, + sessionId, + experimentId: experimentId || undefined, + storeMode, + ts: timestamp, + }), + }).catch(err => { + if (process.env.NODE_ENV === 'development') { + console.error('[price-log-error]', err); + } + }); } if (process.env.NODE_ENV === 'development') { diff --git a/web/src/app/cart/page.tsx b/web/src/app/cart/page.tsx index 30ac3f2..dbcb30b 100644 --- a/web/src/app/cart/page.tsx +++ b/web/src/app/cart/page.tsx @@ -32,7 +32,8 @@ export default function CartPage() { {itemCount > 0 && ( @@ -42,7 +43,7 @@ export default function CartPage() { {itemCount === 0 ? (

Your cart is empty

- Browse our selection + Browse our selection
) : ( <> @@ -54,15 +55,11 @@ export default function CartPage() { >
- - {item.type} -

{item.name}

{item.type === 'hotel' && (
-

{String(item.metadata.roomType)}

{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}

{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}

@@ -81,7 +78,8 @@ export default function CartPage() {

${item.price}

@@ -100,7 +98,7 @@ export default function CartPage() { dispatchInteraction('checkout_start', undefined, { total, itemCount }); window.location.href = '/checkout'; }} - className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors" + className="btn-primary w-full" > Proceed to Checkout diff --git a/web/src/app/globals.css b/web/src/app/globals.css index 4a5b0c9..457b974 100644 --- a/web/src/app/globals.css +++ b/web/src/app/globals.css @@ -8,6 +8,9 @@ --bg-secondary: #f5f5f5; --text-primary: #333333; --text-secondary: #666666; + --accent-primary: #007aff; + --accent-primary-hover: #0051d5; + --accent-primary-light: #e6f2ff; --spacing-sm: 8px; --spacing-md: 16px; --spacing-lg: 32px; diff --git a/web/src/app/layout.tsx b/web/src/app/layout.tsx index e9f9b63..5ff49ae 100644 --- a/web/src/app/layout.tsx +++ b/web/src/app/layout.tsx @@ -15,8 +15,8 @@ const geistMono = Geist_Mono({ }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "Travel Booking Platform", + description: "Book flights and hotels with dynamic pricing", }; export default function RootLayout({ diff --git a/web/src/app/page.tsx b/web/src/app/page.tsx index 295f8fd..c97c8ed 100644 --- a/web/src/app/page.tsx +++ b/web/src/app/page.tsx @@ -1,65 +1,5 @@ -import Image from "next/image"; +import { redirect } from 'next/navigation'; export default function Home() { - return ( -
-
- Next.js logo -
-

- To get started, edit the page.tsx file. -

-

- Looking for a starting point or more instructions? Head over to{" "} - - Templates - {" "} - or the{" "} - - Learning - {" "} - center. -

-
-
- - Vercel logomark - Deploy Now - - - Documentation - -
-
-
- ); + redirect('/hotel'); } diff --git a/web/src/components/feats/hotel/HotelCard.tsx b/web/src/components/feats/hotel/HotelCard.tsx index 5bf234d..847e1b2 100644 --- a/web/src/components/feats/hotel/HotelCard.tsx +++ b/web/src/components/feats/hotel/HotelCard.tsx @@ -2,6 +2,7 @@ import type { EventName } from '@/lib/events'; import type { Hotel } from '@/lib/hotel-utils'; +import { getHotelImageUrl } from '@/lib/hotel-utils'; import { useHoverTracking } from '@/hooks/useHoverTracking'; import PriceDisplay from '@/components/ui/PriceDisplay'; @@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) { window.location.href = `/hotel/products/${hotel.id}`; }; - const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`; - return (
{hotel.name} { diff --git a/web/src/components/feats/hotel/HotelDetails.tsx b/web/src/components/feats/hotel/HotelDetails.tsx index 6cdbbdd..030769f 100644 --- a/web/src/components/feats/hotel/HotelDetails.tsx +++ b/web/src/components/feats/hotel/HotelDetails.tsx @@ -2,6 +2,7 @@ import { useState, useEffect } from 'react'; import type { Hotel } from '@/lib/hotel-utils'; +import { getHotelImageUrl } from '@/lib/hotel-utils'; import PriceDisplay from '@/components/ui/PriceDisplay'; interface HotelDetailsProps { @@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n }; export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) { - const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`; - return (
{product.name} { diff --git a/web/src/components/ui/Navigation.tsx b/web/src/components/ui/Navigation.tsx index 9d9d4cf..6f0ecbb 100644 --- a/web/src/components/ui/Navigation.tsx +++ b/web/src/components/ui/Navigation.tsx @@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode } href={href} className={`px-4 py-2 rounded-md transition-colors ${ isActive - ? 'bg-[var(--accent-primary)] font-semibold' + ? 'bg-[var(--accent-primary)] text-white font-semibold' : 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]' }`} > diff --git a/web/src/lib/airline-utils.ts b/web/src/lib/airline-utils.ts index 74a1916..b801e14 100644 --- a/web/src/lib/airline-utils.ts +++ b/web/src/lib/airline-utils.ts @@ -31,7 +31,7 @@ export interface Flight { availability: number; } -const EPOCH = new Date(0); +import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils'; export const transformProduct = (p: AirlineProduct): Flight => { const { id, flight_type, date_index, metadata, availability } = p; @@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => { }; }; -// convert date string to days from today -export const dateToDaysFromToday = (dateStr: string): number => { - const target = new Date(dateStr); - target.setHours(0, 0, 0, 0); - const today = new Date(); - today.setHours(0, 0, 0, 0); - return Math.floor((target.getTime() - today.getTime()) / 86400000); -}; - -// convert date string to date_index (days since epoch) -export const dateToIndex = (dateStr: string): number => { - const d = new Date(dateStr); - return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000); -}; - -// get current date_index -export const todayIndex = (): number => { - const now = new Date(); - now.setHours(0, 0, 0, 0); - return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000); -}; +export { dateToDaysFromToday, dateToIndex, todayIndex }; diff --git a/web/src/lib/date-utils.ts b/web/src/lib/date-utils.ts new file mode 100644 index 0000000..bad1a90 --- /dev/null +++ b/web/src/lib/date-utils.ts @@ -0,0 +1,23 @@ +const EPOCH = new Date(0); +const MS_PER_DAY = 86400000; + +export const dateToDaysFromToday = (dateStr: string): number => { + const target = new Date(dateStr); + target.setHours(0, 0, 0, 0); + const today = new Date(); + today.setHours(0, 0, 0, 0); + return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY); +}; + +export const dateToIndex = (dateStr: string): number => { + const d = new Date(dateStr); + return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY); +}; + +export const todayIndex = (): number => { + const now = new Date(); + now.setHours(0, 0, 0, 0); + return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY); +}; + +export { EPOCH, MS_PER_DAY }; diff --git a/web/src/lib/hotel-utils.ts b/web/src/lib/hotel-utils.ts index b59994a..e5ba5c2 100644 --- a/web/src/lib/hotel-utils.ts +++ b/web/src/lib/hotel-utils.ts @@ -25,7 +25,7 @@ export interface Hotel { nights: number; } -const EPOCH = new Date(0); +import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils'; export const transformProduct = (p: HotelProduct): Hotel => { const { id, room_type, date_index, metadata } = p; @@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => { // legacy: treat as offset from today const today = new Date(); today.setHours(0, 0, 0, 0); - checkIn = new Date(today.getTime() + date_index * 86400000); + checkIn = new Date(today.getTime() + date_index * MS_PER_DAY); } else { // proper: days since epoch - checkIn = new Date(EPOCH.getTime() + date_index * 86400000); + checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY); } const nights = 1; - const checkOut = new Date(checkIn.getTime() + nights * 86400000); + const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY); const formatOpts: Intl.DateTimeFormatOptions = { month: 'short', @@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => { }; }; -// convert date string to days from today -export const dateToDaysFromToday = (dateStr: string): number => { - const target = new Date(dateStr); - target.setHours(0, 0, 0, 0); - const today = new Date(); - today.setHours(0, 0, 0, 0); - return Math.floor((target.getTime() - today.getTime()) / 86400000); +const hotelImagePool = [ + 'photo-1566073771259-6a8506099945', + 'photo-1551882547-ff40c63fe5fa', + 'photo-1590490360182-c33d57733427', + 'photo-1582719478250-c89cae4dc85b', + 'photo-1596701062351-8c2c14d1fdd0', + 'photo-1631049307264-da0ec9d70304', + 'photo-1578683010236-d716f9a3f461', + 'photo-1540518614846-7eded433c457', + 'photo-1505693416388-ac5ce068fe85', + 'photo-1522771739844-6a9f6d5f14af', + 'photo-1562438668-bcf0ca6578f0', + 'photo-1595576508898-0ad5c879a061', +]; + +const hashString = (s: string): number => { + let h = 0; + for (let i = 0; i < s.length; i++) { + h = ((h << 5) - h) + s.charCodeAt(i); + h = h & h; + } + return Math.abs(h); }; -// convert date string to date_index (days since epoch) -export const dateToIndex = (dateStr: string): number => { - const d = new Date(dateStr); - return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000); +export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => { + const idx = hashString(hotelId) % hotelImagePool.length; + const photoId = hotelImagePool[idx]; + return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`; }; -// get current date_index -export const todayIndex = (): number => { - const now = new Date(); - now.setHours(0, 0, 0, 0); - return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000); -}; +export { dateToDaysFromToday, dateToIndex, todayIndex };