mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
Merge pull request #44 from velocitatem/agent-behavior-loader-developemen
Agent behavior loader developement + rl loop definition and e2e tests.
This commit is contained in:
30
.gitignore
vendored
30
.gitignore
vendored
@@ -5,18 +5,28 @@
|
||||
**/.virtual_documents/
|
||||
**/session_*.svg
|
||||
**/*graph.svg
|
||||
paper/src/bib/auto
|
||||
**/auto/*.el
|
||||
*.old
|
||||
**/package-lock.json
|
||||
**/*.parquet
|
||||
**/_build/
|
||||
|
||||
# Airflow logs - exclude DAG run logs
|
||||
paper/src/bib/auto
|
||||
=======
|
||||
**/_build/
|
||||
paper/src/auto/*
|
||||
paper/src/bib/auto
|
||||
docs/goals/*.md
|
||||
PHANTOM.wiki/
|
||||
experiments/airflow/logs/*
|
||||
experiments/airflow/logs/scheduler/
|
||||
experiments/airflow/logs/dag_processor_manager/
|
||||
experiments/collected_data/*
|
||||
|
||||
paper/src/auto/*
|
||||
lib/
|
||||
docs/goals/*.md
|
||||
PHANTOM.wiki/
|
||||
experiments/collected_data/
|
||||
experiments/agents/collected_data/
|
||||
sim/rl/behavior_loader/*.dot
|
||||
sim/rl/behavior_loader/*.png
|
||||
sim/rl/behavior_loader/*.svg
|
||||
sim/rl/behavior_loader/*.pdf
|
||||
tests/e2e/node_modules/**
|
||||
**/auto/*.el
|
||||
*.old
|
||||
lab/case/thesis/runs*/
|
||||
sim/case/thesis_simplified/runs*/
|
||||
|
||||
2
Makefile
2
Makefile
@@ -49,8 +49,10 @@ test.backend: $(VENV)
|
||||
test.e2e:
|
||||
@cd tests/e2e && npm install
|
||||
@cd tests/e2e && npx playwright install chromium
|
||||
@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
|
||||
@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
|
||||
@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
|
||||
@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
|
||||
@cd tests/e2e && npm test
|
||||
|
||||
.PHONY: test.all
|
||||
|
||||
@@ -47,53 +47,52 @@ def health() -> dict:
|
||||
|
||||
@app.get("/api/{mode}/price/{productId}", response_model=PriceResponse)
|
||||
def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)):
|
||||
"""
|
||||
THIS is the fast lookup service (mechanism).
|
||||
Priority: session-keyed price > global optimal price > base price
|
||||
"""
|
||||
product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0]
|
||||
if not product: raise HTTPException(404, f"Product {productId} not found")
|
||||
|
||||
metadata = product['metadata']
|
||||
base_price = metadata.get('base_price', 100.0)
|
||||
|
||||
# fetch pre-computed prices from registry
|
||||
# PRIORITY 1: session-aware price (computed by Airflow worker)
|
||||
if sessionId:
|
||||
session_price = registry.get_session_price(sessionId, productId)
|
||||
if session_price is not None:
|
||||
return PriceResponse(
|
||||
productId=productId,
|
||||
price=session_price,
|
||||
base_price=base_price,
|
||||
markup=session_price/base_price,
|
||||
elasticity=None,
|
||||
model_version='session-aware'
|
||||
)
|
||||
|
||||
# PRIORITY 2: global pre-computed prices (surge pricing)
|
||||
prices_df = registry.get_prices('latest')
|
||||
elasticity_df = registry.get_elasticity('latest')
|
||||
|
||||
if prices_df is None:
|
||||
# fallback: no pre-computed prices available
|
||||
return PriceResponse(
|
||||
productId=productId,
|
||||
price=base_price,
|
||||
base_price=base_price,
|
||||
markup=1.0,
|
||||
elasticity=None
|
||||
)
|
||||
|
||||
# lookup pre-computed price for this product
|
||||
product_price_row = prices_df[prices_df['productId'] == productId]
|
||||
if product_price_row.empty:
|
||||
# product not in pre-computed prices, fallback to base
|
||||
return PriceResponse(
|
||||
productId=productId,
|
||||
price=base_price,
|
||||
base_price=base_price,
|
||||
markup=1.0,
|
||||
elasticity=None
|
||||
)
|
||||
|
||||
optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as aresult
|
||||
|
||||
# get elasticity if available
|
||||
product_elasticity = None
|
||||
if elasticity_df is not None:
|
||||
product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId]
|
||||
if not product_elasticity_row.empty:
|
||||
product_elasticity = float(product_elasticity_row['elasticity'].iloc[0])
|
||||
if prices_df is not None:
|
||||
product_price_row = prices_df[prices_df['productId'] == productId]
|
||||
if not product_price_row.empty:
|
||||
optimal_price = float(product_price_row['optimal_price'].iloc[0])
|
||||
return PriceResponse(
|
||||
productId=productId,
|
||||
price=optimal_price,
|
||||
base_price=base_price,
|
||||
markup=optimal_price/base_price,
|
||||
elasticity=None,
|
||||
model_version='surge'
|
||||
)
|
||||
|
||||
# PRIORITY 3: fallback to base price
|
||||
return PriceResponse(
|
||||
productId=productId,
|
||||
price=optimal_price,
|
||||
price=base_price,
|
||||
base_price=base_price,
|
||||
markup=optimal_price/base_price,
|
||||
elasticity=product_elasticity
|
||||
markup=1.0,
|
||||
elasticity=None,
|
||||
model_version='base'
|
||||
)
|
||||
|
||||
@app.get("/models")
|
||||
|
||||
@@ -198,12 +198,16 @@ def dump_logs(
|
||||
auto_offset_reset='earliest',
|
||||
enable_auto_commit=False,
|
||||
value_deserializer=lambda x: json.loads(x.decode('utf-8')),
|
||||
consumer_timeout_ms=5000
|
||||
consumer_timeout_ms=30000,
|
||||
fetch_max_wait_ms=10000,
|
||||
max_poll_records=1000
|
||||
)
|
||||
|
||||
events = []
|
||||
for msg in consumer:
|
||||
events.append(msg.value)
|
||||
if last_n and len(events) >= last_n * 2:
|
||||
break
|
||||
|
||||
consumer.close()
|
||||
|
||||
|
||||
@@ -112,11 +112,14 @@ services:
|
||||
depends_on:
|
||||
- postgres
|
||||
environment:
|
||||
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
|
||||
- AIRFLOW__CORE__EXECUTOR=LocalExecutor
|
||||
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
|
||||
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
|
||||
- AIRFLOW__CORE__LOAD_EXAMPLES=false
|
||||
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
|
||||
- AIRFLOW__CORE__PARALLELISM=16
|
||||
- AIRFLOW__CORE__DAG_CONCURRENCY=8
|
||||
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
|
||||
- _AIRFLOW_DB_MIGRATE=true
|
||||
- _AIRFLOW_WWW_USER_CREATE=true
|
||||
- _AIRFLOW_WWW_USER_USERNAME=admin
|
||||
@@ -136,14 +139,20 @@ services:
|
||||
- airflow-init
|
||||
- redis
|
||||
environment:
|
||||
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
|
||||
- AIRFLOW__CORE__EXECUTOR=LocalExecutor
|
||||
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
|
||||
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
|
||||
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
|
||||
- AIRFLOW__CORE__LOAD_EXAMPLES=false
|
||||
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
|
||||
- AIRFLOW__CORE__PARALLELISM=16
|
||||
- AIRFLOW__CORE__DAG_CONCURRENCY=8
|
||||
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
|
||||
- AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
|
||||
- AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
|
||||
- AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
|
||||
- AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
|
||||
- AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
|
||||
- KAFKA_HOST=kafka
|
||||
- KAFKA_PORT=29092
|
||||
- BACKEND_URL=http://backend:5000
|
||||
@@ -173,13 +182,20 @@ services:
|
||||
redis:
|
||||
condition: service_started
|
||||
environment:
|
||||
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
|
||||
- AIRFLOW__CORE__EXECUTOR=LocalExecutor
|
||||
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
|
||||
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
|
||||
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
|
||||
- AIRFLOW__CORE__LOAD_EXAMPLES=false
|
||||
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
|
||||
- AIRFLOW__CORE__PARALLELISM=16
|
||||
- AIRFLOW__CORE__DAG_CONCURRENCY=8
|
||||
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
|
||||
- AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
|
||||
- AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
|
||||
- AIRFLOW__SCHEDULER__PARSING_PROCESSES=2
|
||||
- AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
|
||||
- AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
|
||||
- KAFKA_HOST=kafka
|
||||
- KAFKA_PORT=29092
|
||||
- BACKEND_URL=http://backend:5000
|
||||
|
||||
66
engine/engine.py
Normal file
66
engine/engine.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from sys import platform
|
||||
import numpy as np
|
||||
from .lib.demand import generate_demand, estimate_demand
|
||||
from .lib.behavior import sample_behavior
|
||||
from logging import INFO, getLogger
|
||||
logger = getLogger(__name__)
|
||||
logger.setLevel(INFO)
|
||||
|
||||
|
||||
|
||||
class MarketEngine():
|
||||
def __init__(self,
|
||||
alpha = 0.5,
|
||||
N = 100,
|
||||
demand_distribution = (50, 10),
|
||||
demand_sampling_function = np.random.normal):
|
||||
self.Nagents = int(N*alpha)
|
||||
self.Nhumans = int(N*(1-alpha))
|
||||
self.demand = (demand_sampling_function, demand_distribution)
|
||||
|
||||
def act(self, prices):
|
||||
demand = generate_demand(prices, *self.demand)
|
||||
sample_n = lambda n, human: [sample_behavior(demand, human=human) for _ in range(n)]
|
||||
human_t, agent_t = sample_n(self.Nhumans, True), sample_n(self.Nagents, False)
|
||||
trajectories = human_t + agent_t
|
||||
demand_estimate = estimate_demand(trajectories)
|
||||
return demand_estimate
|
||||
|
||||
def measure(self):
|
||||
pass
|
||||
|
||||
class PricingEngine():
|
||||
def __init__(self,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def act(self, demand):
|
||||
return np.random.uniform(low=25, high=100, size=10)
|
||||
|
||||
|
||||
|
||||
class Limbo():
|
||||
def __init__(self,
|
||||
platform,
|
||||
market
|
||||
) -> None:
|
||||
self.platform_turn = True
|
||||
self.platform = platform
|
||||
self.market = market
|
||||
self.output = None
|
||||
|
||||
def step(self):
|
||||
# we could code golf this a little bit
|
||||
if self.platform_turn:
|
||||
self.output = self.platform.act(self.output)
|
||||
else:
|
||||
self.output = self.market.act(self.output)
|
||||
print(self.output)
|
||||
self.platform_turn = not self.platform_turn
|
||||
|
||||
if __name__ == "__main__":
|
||||
platform = PricingEngine()
|
||||
market = MarketEngine()
|
||||
limbo = Limbo(platform, market)
|
||||
for _ in range(10):
|
||||
limbo.step()
|
||||
3
engine/lib/__init__.py
Normal file
3
engine/lib/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .demand import generate_demand, estimate_demand
|
||||
from .behavior import sample_behavior
|
||||
from .render import DashboardRenderer, style_axis
|
||||
47
engine/lib/behavior.py
Normal file
47
engine/lib/behavior.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from sim.rl.behavior_loader.models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from .demand import generate_demand
|
||||
|
||||
base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
|
||||
human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
|
||||
|
||||
_cache = {} # lazy cache for models and base pivots
|
||||
|
||||
def _get_base_pivot(human: bool):
|
||||
key = 'human' if human else 'agent'
|
||||
if key not in _cache:
|
||||
model = BehaviorModel(human_dir) if human else AgentBehaviorModel(agent_dir)
|
||||
mdp = model.build_MDP()
|
||||
_cache[key] = pd.DataFrame(aggregate_event_transitions(mdp)).fillna(0.0)
|
||||
return _cache[key]
|
||||
|
||||
def adjust_behavior_to_condition(condition, transition_matrix):
|
||||
# expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition
|
||||
cond_norm = condition / np.sum(condition)
|
||||
n_products = len(condition)
|
||||
base_vals = transition_matrix.values
|
||||
base_cols, base_rows = transition_matrix.columns.tolist(), transition_matrix.index.tolist()
|
||||
|
||||
# expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm
|
||||
expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
|
||||
new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
|
||||
new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
|
||||
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
|
||||
|
||||
def sample_behavior(condition, human=True, max_len=40):
|
||||
base_pivot = _get_base_pivot(human)
|
||||
adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot)
|
||||
|
||||
trajectory = [np.random.choice(adjusted_transitions.index)]
|
||||
while len(trajectory) < max_len or 'checkout' in trajectory[-1]:
|
||||
probs = adjusted_transitions.loc[trajectory[-1]].values
|
||||
sample = np.random.choice(adjusted_transitions.columns, p=probs/np.sum(probs) if np.sum(probs) > 0 else None)
|
||||
trajectory.append(sample)
|
||||
return trajectory
|
||||
|
||||
if __name__ == "__main__":
|
||||
t=sample_behavior(generate_demand(np.array([10,20,30])), human=True)
|
||||
print(t)
|
||||
t=sample_behavior(generate_demand(np.array([10,20,30])), human=False)
|
||||
print(t)
|
||||
45
engine/lib/demand.py
Normal file
45
engine/lib/demand.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import logging
|
||||
import numpy as np
|
||||
from logging import getLogger
|
||||
logger = getLogger(__name__)
|
||||
|
||||
def generate_demand(prices, distribution_method = np.random.normal, distribution_params = (50.0, 10.0)):
|
||||
# assumption 1: each product has an intrinsic valuation drawn from a normal distribution centered at 50
|
||||
product_valuations = distribution_method(*distribution_params, size=len(prices))
|
||||
# assumption 2: demand decreases as price increases, following a simple linear model
|
||||
demand = np.maximum(0, product_valuations - prices) # demand cannot be negative
|
||||
total = np.sum(demand)
|
||||
demand = demand / total * 100 if total > 0 else demand # normalize to percentage, avoid div by zero
|
||||
logger.info(f"Generated demand for prices {prices}: {demand} with valuations from distribution {distribution_params}")
|
||||
return demand
|
||||
|
||||
def estimate_demand(trajectories):
|
||||
demand_estimate = {}
|
||||
for traj in trajectories:
|
||||
for event in traj:
|
||||
if 'view_product' in event:
|
||||
product_id = int(event.split('_')[-1].replace('product', ''))
|
||||
demand_estimate[product_id] = demand_estimate.get(product_id, 0) + 1
|
||||
total_views = sum(demand_estimate.values())
|
||||
for product_id in demand_estimate:
|
||||
demand_estimate[product_id] = (demand_estimate[product_id] / total_views) * 100 # normalize to percentage
|
||||
return demand_estimate
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
np.random.seed(42)
|
||||
prices = np.array([20.0, 35.0, 50.0, 65.0])
|
||||
demand = generate_demand(prices)
|
||||
print("Generated Demand:", demand)
|
||||
from .behavior import sample_behavior
|
||||
N, alphat =200, 0.1
|
||||
trajectories = []
|
||||
for _ in range(int(N*(1 - alphat))):
|
||||
trajectories.append(sample_behavior(demand, human=True))
|
||||
for _ in range(int(N*alphat)):
|
||||
trajectories.append(sample_behavior(demand, human=False))
|
||||
demand_estimate = estimate_demand(trajectories)
|
||||
print("Estimated Demand from Behavior:", demand_estimate)
|
||||
delta = {k: demand_estimate.get(k, 0) - demand[i] for i, k in enumerate(range(len(prices)))}
|
||||
delta = np.mean([np.abs(v) for v in delta.values()])
|
||||
print("Demand Delta:", delta)
|
||||
126
engine/lib/render.py
Normal file
126
engine/lib/render.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""rendering logic for PHANTOM environment dashboard"""
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.gridspec import GridSpec
|
||||
|
||||
|
||||
def style_axis(ax, title: str = None, xlabel: str = None, ylabel: str = None):
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.spines['right'].set_visible(False)
|
||||
if title: ax.set_title(title, fontsize=11, fontweight='bold', pad=8)
|
||||
if xlabel: ax.set_xlabel(xlabel, fontsize=9)
|
||||
if ylabel: ax.set_ylabel(ylabel, fontsize=9)
|
||||
|
||||
|
||||
class DashboardRenderer:
|
||||
"""stateful renderer for PHANTOM market dynamics visualization"""
|
||||
|
||||
def __init__(self):
|
||||
self.fig = None
|
||||
self.gs = None
|
||||
|
||||
def render(self, env) -> None:
|
||||
if self.fig is None:
|
||||
plt.ion()
|
||||
self.fig = plt.figure(figsize=(14, 10))
|
||||
self.gs = GridSpec(3, 3, figure=self.fig, hspace=0.35, wspace=0.3,
|
||||
left=0.07, right=0.95, top=0.92, bottom=0.08)
|
||||
plt.show(block=False)
|
||||
|
||||
self.fig.clear()
|
||||
self.fig.suptitle(f'PHANTOM Market Dynamics [t={env._step_count}, a={env.alpha:.2f}]',
|
||||
fontsize=14, fontweight='bold')
|
||||
|
||||
demand_mat = np.array(env._demand_history).T
|
||||
price_mat = np.array(env._price_history).T
|
||||
elasticity = env._compute_elasticity()
|
||||
|
||||
self._render_scatter(env)
|
||||
self._render_elasticity_bar(env, elasticity)
|
||||
self._render_session_pie(env)
|
||||
self._render_price_heatmap(price_mat)
|
||||
self._render_demand_heatmap(demand_mat)
|
||||
self._render_correlation(env.n_products, price_mat, demand_mat)
|
||||
self._render_revenue(env)
|
||||
|
||||
self.fig.canvas.draw_idle()
|
||||
self.fig.canvas.flush_events()
|
||||
|
||||
def _render_scatter(self, env):
|
||||
ax = self.fig.add_subplot(self.gs[0, 0])
|
||||
prices_flat = np.array(env._price_history).flatten()
|
||||
demands_flat = np.array(env._demand_history).flatten()
|
||||
product_ids = np.tile(np.arange(env.n_products), len(env._price_history))
|
||||
ax.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none')
|
||||
if len(prices_flat) > 1:
|
||||
z = np.polyfit(prices_flat, demands_flat, 1)
|
||||
p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50)
|
||||
ax.plot(p_line, np.polyval(z, p_line), '--', lw=1.5, alpha=0.8)
|
||||
style_axis(ax, "Price-Demand Relationship", "Price ($)", "Demand")
|
||||
|
||||
def _render_elasticity_bar(self, env, elasticity):
|
||||
ax = self.fig.add_subplot(self.gs[0, 1])
|
||||
ax.barh(range(env.n_products), elasticity, alpha=0.8)
|
||||
ax.axvline(0, lw=0.8, alpha=0.5)
|
||||
ax.axvline(-1, lw=1, ls='--', alpha=0.5)
|
||||
ax.set_yticks(range(env.n_products))
|
||||
ax.set_yticklabels([f'P{i}' for i in range(env.n_products)], fontsize=7)
|
||||
style_axis(ax, "Price Elasticity", "(dQ/dP)(P/Q)", None)
|
||||
|
||||
def _render_session_pie(self, env):
|
||||
ax = self.fig.add_subplot(self.gs[0, 2])
|
||||
n_h, n_a = env.market.Nhumans, env.market.Nagents
|
||||
wedges, _ = ax.pie([n_h, n_a], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': 'white'})
|
||||
ax.legend(wedges, [f'H ({n_h})', f'A ({n_a})'], loc='lower center', fontsize=8,
|
||||
frameon=False, bbox_to_anchor=(0.5, -0.05))
|
||||
ax.set_title("Session Mix", fontsize=11, fontweight='bold')
|
||||
|
||||
def _render_price_heatmap(self, price_mat):
|
||||
ax = self.fig.add_subplot(self.gs[1, :2])
|
||||
im = ax.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower')
|
||||
style_axis(ax, "Price Heatmap P(product, t)", "Step", "Product")
|
||||
cbar = self.fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02)
|
||||
cbar.set_label('$', fontsize=8)
|
||||
|
||||
def _render_demand_heatmap(self, demand_mat):
|
||||
ax = self.fig.add_subplot(self.gs[1, 2])
|
||||
im = ax.imshow(demand_mat, aspect='auto', cmap='Blues', origin='lower')
|
||||
style_axis(ax, "Demand Q(product, t)", "Step", None)
|
||||
self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
|
||||
|
||||
def _render_correlation(self, n_products, price_mat, demand_mat):
|
||||
ax = self.fig.add_subplot(self.gs[2, 0])
|
||||
if price_mat.shape[1] > 2:
|
||||
corr = np.corrcoef(price_mat, demand_mat)[:n_products, n_products:]
|
||||
im = ax.imshow(corr, cmap='RdBu', vmin=-1, vmax=1, aspect='auto')
|
||||
ax.set_xticks(range(n_products))
|
||||
ax.set_yticks(range(n_products))
|
||||
ax.set_xticklabels([f'Q{i}' for i in range(n_products)], fontsize=6)
|
||||
ax.set_yticklabels([f'P{i}' for i in range(n_products)], fontsize=6)
|
||||
self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
|
||||
style_axis(ax, "Price-Demand Correlation", None, None)
|
||||
|
||||
def _render_revenue(self, env):
|
||||
ax = self.fig.add_subplot(self.gs[2, 1:])
|
||||
n_steps = len(env._revenue_history)
|
||||
demand_std = [np.std(d) for d in env._demand_history]
|
||||
ax.fill_between(range(n_steps), env._revenue_history, alpha=0.3)
|
||||
ax.plot(env._revenue_history, linewidth=2, label='Revenue')
|
||||
ax.set_xlim(0, max(n_steps, 1))
|
||||
ax.set_ylim(0, max(env._revenue_history) * 1.1 if env._revenue_history else 1)
|
||||
|
||||
ax2 = ax.twinx()
|
||||
ax2.plot(range(n_steps), demand_std, linewidth=2, ls='-', alpha=0.9, label='sigma(Demand)')
|
||||
d_min, d_max = min(demand_std), max(demand_std)
|
||||
margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5
|
||||
ax2.set_ylim(max(0, d_min - margin), d_max + margin)
|
||||
ax2.set_ylabel('Demand sigma', fontsize=9)
|
||||
|
||||
style_axis(ax, "Revenue & Demand Dispersion", "Step", "Revenue ($)")
|
||||
ax.legend(loc='upper left', fontsize=7, frameon=False)
|
||||
ax2.legend(loc='upper right', fontsize=7, frameon=False)
|
||||
|
||||
def close(self):
|
||||
if self.fig:
|
||||
plt.close(self.fig)
|
||||
self.fig = None
|
||||
34
engine/studies/factors.py
Normal file
34
engine/studies/factors.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""shared factor definitions for experimental designs"""
|
||||
import numpy as np
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable, Any
|
||||
|
||||
@dataclass
|
||||
class Factor:
|
||||
name: str
|
||||
levels: list
|
||||
primary: bool = True # full cross vs sampled
|
||||
|
||||
# demand functions with compatible signatures
|
||||
def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size))
|
||||
def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size)
|
||||
def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size)
|
||||
def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size)
|
||||
|
||||
DEMAND_FUNCTIONS = {
|
||||
"linear": demand_linear,
|
||||
"uniform": demand_uniform,
|
||||
"exponential": demand_exponential,
|
||||
"logistic": demand_logistic,
|
||||
}
|
||||
|
||||
FACTORS = [
|
||||
Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True),
|
||||
Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True),
|
||||
Factor("n_products", [5, 15, 30, 50], primary=True),
|
||||
Factor("demand_mu", [30.0, 50.0, 70.0], primary=False),
|
||||
Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False),
|
||||
Factor("N", [100, 500, 1000], primary=False),
|
||||
]
|
||||
|
||||
SEEDS_PER_CONFIG = 5
|
||||
89
engine/studies/full_factorial.py
Normal file
89
engine/studies/full_factorial.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""full factorial design - all factor combinations"""
|
||||
import sys
|
||||
sys.path.insert(0, "..")
|
||||
import logging
|
||||
from itertools import product
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def generate_configs():
|
||||
"""generate all factor combinations with seeds"""
|
||||
all_levels = [f.levels for f in FACTORS]
|
||||
names = [f.name for f in FACTORS]
|
||||
|
||||
configs = []
|
||||
for combo in product(*all_levels):
|
||||
base = {names[i]: combo[i] for i in range(len(names))}
|
||||
for seed in range(SEEDS_PER_CONFIG):
|
||||
cfg = {**base, "seed": seed}
|
||||
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
|
||||
configs.append(cfg)
|
||||
return configs
|
||||
|
||||
def run_single(cfg: dict) -> dict:
|
||||
"""execute one experiment config, return metrics"""
|
||||
from engine.wrapper import PHANTOM
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(cfg["seed"])
|
||||
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
|
||||
|
||||
env = PHANTOM(
|
||||
n_products=cfg["n_products"],
|
||||
alpha=cfg["alpha"],
|
||||
N=cfg["N"],
|
||||
)
|
||||
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
|
||||
|
||||
obs, _ = env.reset()
|
||||
total_reward, steps = 0.0, 0
|
||||
|
||||
for _ in range(100):
|
||||
action = env.action_space.sample()
|
||||
obs, reward, term, trunc, _ = env.step(action)
|
||||
total_reward += reward
|
||||
steps += 1
|
||||
if term: break
|
||||
|
||||
env.close()
|
||||
return {
|
||||
"id": cfg["id"],
|
||||
"config": cfg,
|
||||
"total_reward": total_reward,
|
||||
"avg_reward": total_reward / steps if steps > 0 else 0.0,
|
||||
"steps": steps,
|
||||
}
|
||||
|
||||
def run_study(max_workers: int = None, output: str = "results_full.jsonl"):
|
||||
configs = generate_configs()
|
||||
log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)")
|
||||
|
||||
results = []
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as ex:
|
||||
for i, result in enumerate(ex.map(run_single, configs)):
|
||||
results.append(result)
|
||||
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
|
||||
|
||||
Path(output).write_text("\n".join(json.dumps(r) for r in results))
|
||||
log.info(f"wrote {len(results)} results to {output}")
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--workers", type=int, default=None)
|
||||
p.add_argument("--output", default="results_full.jsonl")
|
||||
p.add_argument("--dry-run", action="store_true", help="only show design size")
|
||||
args = p.parse_args()
|
||||
|
||||
configs = generate_configs()
|
||||
log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}")
|
||||
|
||||
if not args.dry_run:
|
||||
run_study(args.workers, args.output)
|
||||
106
engine/studies/mixed_lh.py
Normal file
106
engine/studies/mixed_lh.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""mixed design: full factorial on primary factors, latin hypercube on secondary"""
|
||||
import sys
|
||||
sys.path.insert(0, "..")
|
||||
import logging
|
||||
from itertools import product
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
import numpy as np
|
||||
from scipy.stats.qmc import LatinHypercube
|
||||
from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
LH_SAMPLES = 10
|
||||
|
||||
def generate_configs(lh_samples: int = LH_SAMPLES):
|
||||
primary = [f for f in FACTORS if f.primary]
|
||||
secondary = [f for f in FACTORS if not f.primary]
|
||||
|
||||
primary_grid = list(product(*[f.levels for f in primary]))
|
||||
lhs = LatinHypercube(d=len(secondary), seed=42)
|
||||
|
||||
configs = []
|
||||
for p_combo in primary_grid:
|
||||
samples = lhs.random(n=lh_samples)
|
||||
for s in samples:
|
||||
sec_vals = {
|
||||
secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))]
|
||||
for i in range(len(secondary))
|
||||
}
|
||||
base = {primary[i].name: p_combo[i] for i in range(len(primary))}
|
||||
base.update(sec_vals)
|
||||
|
||||
for seed in range(SEEDS_PER_CONFIG):
|
||||
cfg = {**base, "seed": seed}
|
||||
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
|
||||
configs.append(cfg)
|
||||
return configs
|
||||
|
||||
def run_single(cfg: dict) -> dict:
|
||||
from engine.wrapper import PHANTOM
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(cfg["seed"])
|
||||
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
|
||||
|
||||
env = PHANTOM(
|
||||
n_products=cfg["n_products"],
|
||||
alpha=cfg["alpha"],
|
||||
N=cfg["N"],
|
||||
)
|
||||
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
|
||||
|
||||
obs, _ = env.reset()
|
||||
total_reward, steps = 0.0, 0
|
||||
|
||||
for _ in range(100):
|
||||
action = env.action_space.sample()
|
||||
obs, reward, term, trunc, _ = env.step(action)
|
||||
total_reward += reward
|
||||
steps += 1
|
||||
if term: break
|
||||
|
||||
env.close()
|
||||
return {
|
||||
"id": cfg["id"],
|
||||
"config": cfg,
|
||||
"total_reward": total_reward,
|
||||
"avg_reward": total_reward / steps,
|
||||
"steps": steps,
|
||||
}
|
||||
|
||||
def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES):
|
||||
configs = generate_configs(lh_samples)
|
||||
n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary]))
|
||||
log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)")
|
||||
|
||||
results = []
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as ex:
|
||||
for i, result in enumerate(ex.map(run_single, configs)):
|
||||
results.append(result)
|
||||
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
|
||||
|
||||
Path(output).write_text("\n".join(json.dumps(r) for r in results))
|
||||
log.info(f"wrote {len(results)} results to {output}")
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--workers", type=int, default=None)
|
||||
p.add_argument("--output", default="results_mixed.jsonl")
|
||||
p.add_argument("--lh-samples", type=int, default=10)
|
||||
p.add_argument("--dry-run", action="store_true", help="only show design size")
|
||||
args = p.parse_args()
|
||||
|
||||
primary = [f for f in FACTORS if f.primary]
|
||||
secondary = [f for f in FACTORS if not f.primary]
|
||||
configs = generate_configs(args.lh_samples)
|
||||
log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}")
|
||||
|
||||
if not args.dry_run:
|
||||
run_study(args.workers, args.output, args.lh_samples)
|
||||
45
engine/train.py
Normal file
45
engine/train.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from stable_baselines3 import SAC
|
||||
from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
|
||||
from .wrapper import PHANTOM
|
||||
|
||||
|
||||
class RenderCallback(BaseCallback):
|
||||
"""Renders environment on every step for live visualization."""
|
||||
def __init__(self, env: PHANTOM):
|
||||
super().__init__()
|
||||
self.env = env
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
self.env.render()
|
||||
return True
|
||||
|
||||
|
||||
env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
|
||||
eval_env = PHANTOM(n_products=10, alpha=0.3, render_mode=None)
|
||||
|
||||
model = SAC(
|
||||
"MultiInputPolicy",
|
||||
env,
|
||||
verbose=1,
|
||||
learning_rate=3e-4,
|
||||
buffer_size=50000,
|
||||
batch_size=256,
|
||||
tau=0.005,
|
||||
gamma=0.99,
|
||||
)
|
||||
|
||||
render_cb = RenderCallback(env)
|
||||
eval_cb = EvalCallback(eval_env, eval_freq=1000, n_eval_episodes=5, verbose=1)
|
||||
|
||||
model.learn(total_timesteps=50000, callback=[render_cb, eval_cb])
|
||||
model.save("phantom_sac")
|
||||
|
||||
# test trained policy
|
||||
env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
|
||||
obs, _ = env.reset()
|
||||
for _ in range(100):
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
obs, reward, term, trunc, _ = env.step(action)
|
||||
env.render()
|
||||
if term or trunc: break
|
||||
env.close()
|
||||
118
engine/wrapper.py
Normal file
118
engine/wrapper.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
import numpy as np
|
||||
from .engine import Limbo, MarketEngine, PricingEngine
|
||||
from .lib.render import DashboardRenderer
|
||||
|
||||
|
||||
class PHANTOM(gym.Env):
|
||||
"""Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand."""
|
||||
metadata = {"render_modes": ["human", "ansi"]}
|
||||
|
||||
def __init__(self,
|
||||
n_products: int = 10,
|
||||
alpha: float = 0.3,
|
||||
N: int = 100,
|
||||
price_bounds: tuple = (10.0, 150.0),
|
||||
lambda_coi: float = 0.1,
|
||||
render_mode: str = None):
|
||||
super().__init__()
|
||||
self.n_products = n_products
|
||||
self.price_bounds = price_bounds
|
||||
self.lambda_coi = lambda_coi
|
||||
self.render_mode = render_mode
|
||||
self.alpha = alpha
|
||||
self.N = N
|
||||
|
||||
self.market = MarketEngine(alpha=alpha, N=N)
|
||||
self._platform_stub = PricingEngine()
|
||||
self._limbo = Limbo(self._platform_stub, self.market)
|
||||
|
||||
self.action_space = spaces.Box(
|
||||
low=price_bounds[0], high=price_bounds[1],
|
||||
shape=(n_products,), dtype=np.float32
|
||||
)
|
||||
self.observation_space = spaces.Dict({
|
||||
"demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32),
|
||||
"prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32),
|
||||
})
|
||||
|
||||
self._prices = None
|
||||
self._demand = None
|
||||
self._step_count = 0
|
||||
self._demand_history = []
|
||||
self._price_history = []
|
||||
self._revenue_history = []
|
||||
self._renderer = None
|
||||
|
||||
def _get_obs(self) -> dict:
|
||||
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
|
||||
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
|
||||
|
||||
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
|
||||
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
|
||||
# TODO: implement supra-competitive price punishment
|
||||
return float(revenue)
|
||||
|
||||
def _record_history(self):
|
||||
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
|
||||
self._demand_history.append(demand_arr)
|
||||
self._price_history.append(self._prices.copy())
|
||||
self._revenue_history.append(np.sum(self._prices * demand_arr))
|
||||
|
||||
def reset(self, seed=None, options=None):
|
||||
super().reset(seed=seed)
|
||||
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
|
||||
self._demand = self.market.act(self._prices)
|
||||
self._step_count = 0
|
||||
self._demand_history, self._price_history, self._revenue_history = [], [], []
|
||||
self._record_history()
|
||||
return self._get_obs(), {}
|
||||
|
||||
def step(self, action: np.ndarray):
|
||||
self._prices = np.clip(action, *self.price_bounds)
|
||||
self._demand = self.market.act(self._prices)
|
||||
self._step_count += 1
|
||||
self._record_history()
|
||||
|
||||
reward = self._compute_reward(self._prices, self._demand)
|
||||
terminated = self._step_count >= 100
|
||||
|
||||
return self._get_obs(), reward, terminated, False, {"step": self._step_count}
|
||||
|
||||
def _compute_elasticity(self) -> np.ndarray:
|
||||
"""point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
|
||||
if len(self._price_history) < 2:
|
||||
return np.zeros(self.n_products)
|
||||
p, q = np.array(self._price_history), np.array(self._demand_history)
|
||||
dp, dq = np.diff(p, axis=0), np.diff(q, axis=0)
|
||||
valid = np.abs(dp) > 0.5
|
||||
with np.errstate(divide='ignore', invalid='ignore'):
|
||||
elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0)
|
||||
elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0)
|
||||
return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products)
|
||||
|
||||
def render(self):
|
||||
if self.render_mode == "human":
|
||||
if self._renderer is None:
|
||||
self._renderer = DashboardRenderer()
|
||||
self._renderer.render(self)
|
||||
elif self.render_mode == "ansi":
|
||||
return f"step={self._step_count}, prices={self._prices}, demand={self._demand}"
|
||||
return None
|
||||
|
||||
def close(self):
|
||||
if self._renderer:
|
||||
self._renderer.close()
|
||||
self._renderer = None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human")
|
||||
obs, _ = env.reset()
|
||||
for step in range(100):
|
||||
action = env.action_space.sample()
|
||||
obs, reward, term, trunc, info = env.step(action)
|
||||
env.render()
|
||||
if term: break
|
||||
env.close()
|
||||
117
experiments/agents/run.py
Normal file
117
experiments/agents/run.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from supabase import create_client, Client
|
||||
import os
|
||||
import random
|
||||
import asyncio
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from experiments.agents.agent import get_agent, AgentTypes
|
||||
from lib.kafka_client import get_interactions
|
||||
|
||||
load_dotenv()
|
||||
|
||||
RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
|
||||
|
||||
client = create_client(
|
||||
os.getenv("NEXT_PUBLIC_SUPABASE_URL"),
|
||||
os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
|
||||
)
|
||||
def pick_random_task():
|
||||
mode = 'hotel'
|
||||
tasks = client.table("tasks").select("*").execute().data
|
||||
if mode == 'hotel':
|
||||
# drop all that have 'flight' in the description
|
||||
tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()]
|
||||
return random.choice(tasks) if tasks else None
|
||||
|
||||
def clear_kafka_data():
|
||||
"""Delete and recreate Kafka topics to clear all data"""
|
||||
from kafka.admin import KafkaAdminClient, NewTopic
|
||||
from kafka.errors import UnknownTopicOrPartitionError
|
||||
import time
|
||||
|
||||
kafka_host = os.getenv('KAFKA_HOST', 'localhost')
|
||||
kafka_port = os.getenv('KAFKA_PORT', '9092')
|
||||
broker = f'{kafka_host}:{kafka_port}'
|
||||
|
||||
admin = KafkaAdminClient(bootstrap_servers=broker)
|
||||
topics = ['user-interactions', 'price-logs']
|
||||
|
||||
try:
|
||||
admin.delete_topics(topics, timeout_ms=5000)
|
||||
print(f"Deleted topics: {topics}")
|
||||
time.sleep(2)
|
||||
except UnknownTopicOrPartitionError:
|
||||
print("Topics don't exist, skipping delete")
|
||||
except Exception as e:
|
||||
print(f"Error deleting topics: {e}")
|
||||
|
||||
new_topics = [
|
||||
NewTopic(name='user-interactions', num_partitions=3, replication_factor=1),
|
||||
NewTopic(name='price-logs', num_partitions=3, replication_factor=1)
|
||||
]
|
||||
|
||||
try:
|
||||
admin.create_topics(new_topics=new_topics, validate_only=False)
|
||||
print(f"Recreated topics: {topics}")
|
||||
except Exception as e:
|
||||
print(f"Error creating topics: {e}")
|
||||
finally:
|
||||
admin.close()
|
||||
|
||||
def create_new_experiment(task_id):
|
||||
import uuid
|
||||
subject_name = f"agent_{str(uuid.uuid4())[:8]}"
|
||||
experiment = {
|
||||
"subject_name": subject_name,
|
||||
"xp_human_only": False,
|
||||
"xp_market_mode": "hotel",
|
||||
"xp_task_id": task_id,
|
||||
}
|
||||
response = client.table("experiments").insert(experiment).execute()
|
||||
return response.data[0] if response.data else None
|
||||
|
||||
if __name__ == "__main__":
|
||||
clear_kafka_data()
|
||||
|
||||
task = pick_random_task()
|
||||
if not task:
|
||||
print("No tasks available")
|
||||
exit(1)
|
||||
|
||||
experiment = create_new_experiment(task['id'])
|
||||
exp_id = experiment['id']
|
||||
exp_dir = f"{RESULTS}{exp_id}"
|
||||
os.makedirs(exp_dir, exist_ok=True)
|
||||
|
||||
# construct experiment URL with uuid param
|
||||
base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000')
|
||||
agent_url = f"{base_url}/start-task?uuid={exp_id}"
|
||||
|
||||
print(f"Created experiment {exp_id} for task {task['id']}")
|
||||
print(f"Agent will interact with: {agent_url}")
|
||||
|
||||
# instantiate and run agent
|
||||
agent = get_agent(
|
||||
AgentTypes.GENERIC_BROWSER_USE_AGENT,
|
||||
goal=task['task_description'],
|
||||
url=agent_url,
|
||||
timeout=300,
|
||||
headless=True
|
||||
)
|
||||
|
||||
result = asyncio.run(agent.act())
|
||||
print(f"Agent result: {result}")
|
||||
|
||||
# export interaction and price data from kafka
|
||||
interactions = get_interactions(topic='user-interactions', timeout_ms=3000)
|
||||
prices = get_interactions(topic='price-logs', timeout_ms=3000)
|
||||
|
||||
with open(f"{exp_dir}/int.json", 'w') as f:
|
||||
json.dump(interactions, f, indent=2)
|
||||
|
||||
with open(f"{exp_dir}/price.json", 'w') as f:
|
||||
json.dump(prices, f, indent=2)
|
||||
|
||||
print(f"Experiment {exp_id} completed.")
|
||||
print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}")
|
||||
@@ -1,3 +1,4 @@
|
||||
from pandas.core.algorithms import factorize_array
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
@@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG:
|
||||
# instantiate DAGs for Airflow to discover
|
||||
dag_airline = create_surge_pricing_dag('airline')
|
||||
dag_hotel = create_surge_pricing_dag('hotel')
|
||||
|
||||
# TODO: Refactor this factory from a surge pricing factory to a general pricing factory
|
||||
# We will do this by passing a pricing strategy class to the factory, since the generic pipeline is:
|
||||
# take all interaction data, group by sessionId and assign a new price vector to each session
|
||||
# in the grouping we get a subset of the interactions per sessionId and we can map that to some Features
|
||||
# we define a custom _get_features(interactions .) methodin the strategy class
|
||||
# we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector
|
||||
# this we then publish for each sessionId group
|
||||
# this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes.
|
||||
|
||||
@@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs):
|
||||
# rename demand_score to demand for pricer compatibility
|
||||
data = product_features.rename(columns={'demand_score': 'demand'})
|
||||
|
||||
high_thresh = dag_conf.get('high_threshold', 10)
|
||||
low_thresh = dag_conf.get('low_threshold', 2)
|
||||
surge_mult = dag_conf.get('surge_multiplier', 1.2)
|
||||
discount_mult = dag_conf.get('discount_multiplier', 0.9)
|
||||
|
||||
logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}")
|
||||
logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}")
|
||||
logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}")
|
||||
logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}")
|
||||
|
||||
surge_pricer = SimpleSurgePricer(
|
||||
high_threshold=dag_conf.get('high_threshold', 10),
|
||||
low_threshold=dag_conf.get('low_threshold', 2),
|
||||
surge_multiplier=dag_conf.get('surge_multiplier', 1.2),
|
||||
discount_multiplier=dag_conf.get('discount_multiplier', 0.9)
|
||||
high_threshold=high_thresh,
|
||||
low_threshold=low_thresh,
|
||||
surge_multiplier=surge_mult,
|
||||
discount_multiplier=discount_mult
|
||||
)
|
||||
surge_pricer.fit(data)
|
||||
data['optimal_price'] = surge_pricer.predict()
|
||||
|
||||
base_avg = data['base_price'].mean()
|
||||
optimal_avg = data['optimal_price'].mean()
|
||||
price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100
|
||||
|
||||
logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%")
|
||||
|
||||
prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={
|
||||
'price': 'current_price',
|
||||
'demand': 'demand_score'
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
from .evals import evaluate
|
||||
from .arch import (
|
||||
XGBoostAgentClassifier,
|
||||
LightGBMAgentClassifier
|
||||
LightGBMAgentClassifier,
|
||||
ContrastiveWeakClassifier,
|
||||
TrajectoryEncoder,
|
||||
WeakClassifier,
|
||||
contrastive_loss,
|
||||
featurize_trajectory,
|
||||
)
|
||||
|
||||
__all__ =[
|
||||
__all__ = [
|
||||
'evaluate',
|
||||
'XGBoostAgentClassifier',
|
||||
'LightGBMAgentClassifier'
|
||||
'LightGBMAgentClassifier',
|
||||
'ContrastiveWeakClassifier',
|
||||
'TrajectoryEncoder',
|
||||
'WeakClassifier',
|
||||
'contrastive_loss',
|
||||
'featurize_trajectory',
|
||||
]
|
||||
|
||||
@@ -1,122 +1,212 @@
|
||||
# sklearn compatible models for agent detection
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from procesing.context import PipelineContext
|
||||
from typing import Any, Optional, Tuple
|
||||
from typing import Any, Optional, Tuple, Dict, List
|
||||
from abc import ABC, abstractmethod
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# add lib to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib'))
|
||||
from lib.features import (
|
||||
transition_histogram as _lib_transition_histogram,
|
||||
temporal_signature as _lib_temporal_signature,
|
||||
state_coverage as _lib_state_coverage,
|
||||
transition_entropy as _lib_transition_entropy,
|
||||
featurize_trajectory as _lib_featurize_trajectory,
|
||||
parse_timestamp
|
||||
)
|
||||
from lib.state import event_to_state, get_event_name, get_timestamp
|
||||
|
||||
TASK = 'classification'
|
||||
LABELS = ['human', 'agent']
|
||||
|
||||
|
||||
class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC):
|
||||
"""Base class for tree-based agent detection classifiers with common logic"""
|
||||
class WeakClassifier(BaseEstimator, ClassifierMixin, ABC):
|
||||
# a simple contrastive machine learning model learns to distinguish human/agent behavior
|
||||
# using weakly supervised contrastive learning + augmentation
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
self.model = None
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200,
|
||||
max_depth: int = 6, learning_rate: float = 0.05,
|
||||
early_stopping_rounds: int = 20):
|
||||
self.context = context
|
||||
|
||||
class TrajectoryEncoder(nn.Module):
|
||||
"""Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM"""
|
||||
def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64):
|
||||
super().__init__()
|
||||
self.event_embed = nn.Linear(input_dim, hidden_dim)
|
||||
self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
|
||||
self.proj = nn.Linear(hidden_dim * 2, embed_dim)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (batch, seq_len, input_dim)
|
||||
h = F.relu(self.event_embed(x))
|
||||
_, (hn, _) = self.lstm(h)
|
||||
hn = torch.cat([hn[-2], hn[-1]], dim=1) # concat bidirectional hidden states
|
||||
return F.normalize(self.proj(hn), dim=1) # L2 normalized
|
||||
|
||||
|
||||
class ContrastiveWeakClassifier(WeakClassifier):
|
||||
"""Contrastive learning classifier for human/agent trajectory discrimination"""
|
||||
def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.input_dim = input_dim
|
||||
self.embed_dim = embed_dim
|
||||
self.margin = margin
|
||||
self.encoder = TrajectoryEncoder(input_dim, embed_dim)
|
||||
self.classifier = nn.Linear(embed_dim, 2)
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
self._fitted = False
|
||||
|
||||
def to_device(self):
|
||||
self.encoder.to(self.device)
|
||||
self.classifier.to(self.device)
|
||||
return self
|
||||
|
||||
def encode(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.encoder(x.to(self.device))
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
emb = self.encode(x)
|
||||
return self.classifier(emb)
|
||||
|
||||
def fit(self, X, y=None): # sklearn interface - actual training in weak.train.py
|
||||
self._fitted = True
|
||||
return self
|
||||
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
self.encoder.eval()
|
||||
self.classifier.eval()
|
||||
with torch.no_grad():
|
||||
x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
|
||||
logits = self.forward(x)
|
||||
return torch.argmax(logits, dim=1).cpu().numpy()
|
||||
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
self.encoder.eval()
|
||||
self.classifier.eval()
|
||||
with torch.no_grad():
|
||||
x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
|
||||
logits = self.forward(x)
|
||||
return F.softmax(logits, dim=1).cpu().numpy()
|
||||
|
||||
|
||||
def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor:
|
||||
"""Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range."""
|
||||
pos_sim = F.cosine_similarity(anchor, positive) # higher = more similar
|
||||
neg_sim = F.cosine_similarity(anchor, negative)
|
||||
return F.relu(neg_sim - pos_sim + margin).mean() # want pos_sim > neg_sim + margin
|
||||
|
||||
|
||||
def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor:
|
||||
"""Normalized temperature-scaled cross entropy loss (SimCLR style)"""
|
||||
batch_size = z_i.size(0)
|
||||
z = torch.cat([z_i, z_j], dim=0) # (2N, embed_dim)
|
||||
sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature
|
||||
mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device)
|
||||
sim.masked_fill_(mask, -float('inf'))
|
||||
labels = torch.arange(batch_size, device=z.device)
|
||||
labels = torch.cat([labels + batch_size, labels]) # positive pairs
|
||||
return F.cross_entropy(sim, labels)
|
||||
|
||||
|
||||
# feature extraction utilities - delegating to lib.features for unified implementation
|
||||
# these wrappers maintain backwards compatibility for existing imports
|
||||
|
||||
def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray:
|
||||
"""Compute normalized histogram of state transitions in trajectory"""
|
||||
return _lib_transition_histogram(events, state_fn, max_states)
|
||||
|
||||
|
||||
def temporal_signature(events: List, ts_fn) -> np.ndarray:
|
||||
"""Extract temporal features: mean/std/skew of inter-event times"""
|
||||
return _lib_temporal_signature(events, ts_fn)
|
||||
|
||||
|
||||
def state_coverage(events: List, state_fn, mdp_states: set) -> float:
|
||||
"""Fraction of MDP states visited by trajectory"""
|
||||
return _lib_state_coverage(events, state_fn, mdp_states)
|
||||
|
||||
|
||||
def transition_entropy(events: List, state_fn) -> float:
|
||||
"""Compute entropy of transition distribution (randomness of navigation)"""
|
||||
return _lib_transition_entropy(events, state_fn)
|
||||
|
||||
|
||||
def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray:
|
||||
"""Convert trajectory to fixed-dim feature vector - uses lib.features implementation"""
|
||||
mdp_states = set(mdp.get('states', [])) if mdp else set()
|
||||
|
||||
def _ts_fn(e):
|
||||
return parse_timestamp(get_timestamp(e))
|
||||
|
||||
def _event_name_fn(e):
|
||||
return get_event_name(e)
|
||||
|
||||
return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim)
|
||||
|
||||
|
||||
# gradient boosting classifiers for comparison baselines
|
||||
class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin):
|
||||
"""XGBoost classifier for human/agent detection from session features"""
|
||||
def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs):
|
||||
self.n_estimators = n_estimators
|
||||
self.max_depth = max_depth
|
||||
self.learning_rate = learning_rate
|
||||
self.early_stopping_rounds = early_stopping_rounds
|
||||
self.model_ = None
|
||||
self.feature_names_ = None
|
||||
|
||||
def _to_array(self, X):
|
||||
"""Convert pandas structures to numpy arrays"""
|
||||
return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X
|
||||
|
||||
def _compute_pos_weight(self, y_arr):
|
||||
"""Calculate scale_pos_weight for class imbalance handling"""
|
||||
n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum()
|
||||
return n_neg / n_pos if n_pos > 0 else 1.0
|
||||
|
||||
def _prepare_eval_set(self, eval_set):
|
||||
"""Convert eval_set to numpy arrays if needed"""
|
||||
if not eval_set:
|
||||
return None
|
||||
X_val, y_val = eval_set[0]
|
||||
return [(self._to_array(X_val), self._to_array(y_val))]
|
||||
|
||||
@abstractmethod
|
||||
def _build_model(self, scale_pos: float):
|
||||
"""Build the underlying model instance (must be implemented by subclasses)"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _fit_with_eval(self, X_arr, y_arr, eval_arr):
|
||||
"""Fit model with evaluation set (must be implemented by subclasses)"""
|
||||
pass
|
||||
|
||||
def fit(self, X, y, eval_set=None):
|
||||
X_arr, y_arr = self._to_array(X), self._to_array(y)
|
||||
|
||||
if isinstance(X, pd.DataFrame):
|
||||
self.feature_names_ = X.columns.tolist()
|
||||
|
||||
scale_pos = self._compute_pos_weight(y_arr)
|
||||
self.model_ = self._build_model(scale_pos)
|
||||
|
||||
eval_arr = self._prepare_eval_set(eval_set)
|
||||
if eval_arr:
|
||||
self._fit_with_eval(X_arr, y_arr, eval_arr)
|
||||
else:
|
||||
self.model_.fit(X_arr, y_arr)
|
||||
self.model = None
|
||||
self.kwargs = kwargs
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray):
|
||||
try:
|
||||
import xgboost as xgb
|
||||
self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
|
||||
learning_rate=self.learning_rate, **self.kwargs)
|
||||
self.model.fit(X, y)
|
||||
except ImportError:
|
||||
raise ImportError("xgboost required for XGBoostAgentClassifier")
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
return self.model_.predict(self._to_array(X))
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
if self.model is None:
|
||||
raise ValueError("fit the model first")
|
||||
return self.model.predict(X)
|
||||
|
||||
def predict_proba(self, X):
|
||||
return self.model_.predict_proba(self._to_array(X))
|
||||
|
||||
@property
|
||||
def feature_importances_(self):
|
||||
return self.model_.feature_importances_ if self.model_ else None
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
if self.model is None:
|
||||
raise ValueError("fit the model first")
|
||||
return self.model.predict_proba(X)
|
||||
|
||||
|
||||
class XGBoostAgentClassifier(BaseAgentClassifier):
|
||||
"""XGBoost binary classifier for agent detection with class imbalance handling"""
|
||||
class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin):
|
||||
"""LightGBM classifier for human/agent detection from session features"""
|
||||
def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs):
|
||||
self.n_estimators = n_estimators
|
||||
self.max_depth = max_depth
|
||||
self.learning_rate = learning_rate
|
||||
self.model = None
|
||||
self.kwargs = kwargs
|
||||
|
||||
def _build_model(self, scale_pos: float):
|
||||
return xgb.XGBClassifier(
|
||||
n_estimators=self.n_estimators,
|
||||
max_depth=self.max_depth,
|
||||
learning_rate=self.learning_rate,
|
||||
scale_pos_weight=scale_pos,
|
||||
eval_metric='auc',
|
||||
early_stopping_rounds=self.early_stopping_rounds,
|
||||
random_state=42,
|
||||
tree_method='hist',
|
||||
enable_categorical=False
|
||||
)
|
||||
def fit(self, X: np.ndarray, y: np.ndarray):
|
||||
try:
|
||||
import lightgbm as lgb
|
||||
self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
|
||||
learning_rate=self.learning_rate, verbose=-1, **self.kwargs)
|
||||
self.model.fit(X, y)
|
||||
except ImportError:
|
||||
raise ImportError("lightgbm required for LightGBMAgentClassifier")
|
||||
return self
|
||||
|
||||
def _fit_with_eval(self, X_arr, y_arr, eval_arr):
|
||||
self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False)
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
if self.model is None:
|
||||
raise ValueError("fit the model first")
|
||||
return self.model.predict(X)
|
||||
|
||||
|
||||
class LightGBMAgentClassifier(BaseAgentClassifier):
|
||||
"""LightGBM binary classifier for agent detection with class imbalance handling"""
|
||||
|
||||
def _build_model(self, scale_pos: float):
|
||||
return lgb.LGBMClassifier(
|
||||
n_estimators=self.n_estimators,
|
||||
max_depth=self.max_depth,
|
||||
learning_rate=self.learning_rate,
|
||||
scale_pos_weight=scale_pos,
|
||||
metric='auc',
|
||||
random_state=42,
|
||||
verbosity=-1
|
||||
)
|
||||
|
||||
def _fit_with_eval(self, X_arr, y_arr, eval_arr):
|
||||
self.model_.fit(
|
||||
X_arr, y_arr,
|
||||
eval_set=eval_arr,
|
||||
callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)]
|
||||
)
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
if self.model is None:
|
||||
raise ValueError("fit the model first")
|
||||
return self.model.predict_proba(X)
|
||||
|
||||
246
experiments/ml/weak_train.py
Normal file
246
experiments/ml/weak_train.py
Normal file
@@ -0,0 +1,246 @@
|
||||
import sys
|
||||
sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader")
|
||||
sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml")
|
||||
|
||||
from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel
|
||||
from sim.rl.behavior_loader.models import JointBehaviorModel
|
||||
from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory
|
||||
from typing import List, Optional, Dict
|
||||
from datetime import datetime, timedelta
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import random
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.optim import Adam
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs"
|
||||
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
|
||||
human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
|
||||
|
||||
|
||||
def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel:
|
||||
"""Add random jitter to event timestamp"""
|
||||
new_evt = deepcopy(evt)
|
||||
try:
|
||||
ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00'))
|
||||
delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms))
|
||||
new_evt.ts = (ts + delta).isoformat()
|
||||
except:
|
||||
pass
|
||||
return new_evt
|
||||
|
||||
|
||||
def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]:
|
||||
"""Apply random augmentation to trajectory for contrastive learning"""
|
||||
if len(trajectory) < 2:
|
||||
return trajectory
|
||||
|
||||
aug_type = random.choice(['window', 'shuffle', 'noise', 'drop'])
|
||||
|
||||
if aug_type == 'window': # random contiguous sub-sequence (70-100% length)
|
||||
min_len = max(2, int(len(trajectory) * 0.7))
|
||||
sub_len = random.randint(min_len, len(trajectory))
|
||||
start = random.randint(0, len(trajectory) - sub_len)
|
||||
return trajectory[start:start + sub_len]
|
||||
|
||||
elif aug_type == 'shuffle': # swap adjacent pairs with probability rate
|
||||
result = list(trajectory)
|
||||
for i in range(len(result) - 1):
|
||||
if random.random() < rate:
|
||||
result[i], result[i + 1] = result[i + 1], result[i]
|
||||
return result
|
||||
|
||||
elif aug_type == 'drop': # drop events with probability rate
|
||||
result = [e for e in trajectory if random.random() > rate]
|
||||
return result if len(result) >= 2 else trajectory[:2]
|
||||
|
||||
elif aug_type == 'noise': # perturb timestamps
|
||||
return [_perturb_ts(e, jitter_ms=500) for e in trajectory]
|
||||
|
||||
return trajectory
|
||||
|
||||
|
||||
class TripletDataset(Dataset):
|
||||
"""Generate (anchor, positive, negative) triplets on-the-fly with augmentation"""
|
||||
def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10):
|
||||
self.sessions = list(data.items())
|
||||
self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')]
|
||||
self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')]
|
||||
self.mdp = mdp
|
||||
self.augment = augment_fn
|
||||
self.input_dim = input_dim
|
||||
self.multiplier = multiplier
|
||||
|
||||
if not self.human_ids or not self.agent_ids:
|
||||
raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions")
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.sessions) * self.multiplier
|
||||
|
||||
def __getitem__(self, idx: int):
|
||||
anchor_idx = idx % len(self.sessions)
|
||||
sid, events = self.sessions[anchor_idx]
|
||||
is_human = sid.startswith('human_')
|
||||
|
||||
anchor = featurize_trajectory(events, self.mdp, self.input_dim)
|
||||
positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim)
|
||||
|
||||
neg_pool = self.agent_ids if is_human else self.human_ids
|
||||
neg_idx = random.choice(neg_pool)
|
||||
negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim)
|
||||
|
||||
label = 0 if is_human else 1 # 0=human, 1=agent
|
||||
return (torch.tensor(anchor, dtype=torch.float32),
|
||||
torch.tensor(positive, dtype=torch.float32),
|
||||
torch.tensor(negative, dtype=torch.float32),
|
||||
torch.tensor(label, dtype=torch.long))
|
||||
|
||||
|
||||
def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64,
|
||||
embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None):
|
||||
"""Train contrastive weak classifier on human/agent trajectories"""
|
||||
joint = JointLoader(human_dir, agent_dir)
|
||||
data = joint.get_data()
|
||||
if verbose:
|
||||
print(f"Loaded {len(data)} sessions")
|
||||
|
||||
joint_model = JointBehaviorModel(human_dir, agent_dir)
|
||||
ref_mdp = joint_model.build_MDP()
|
||||
|
||||
dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim)
|
||||
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
|
||||
|
||||
model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
|
||||
model.to_device()
|
||||
|
||||
run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
|
||||
writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}")
|
||||
|
||||
optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
|
||||
ce_loss_fn = torch.nn.CrossEntropyLoss()
|
||||
|
||||
best_loss = float('inf')
|
||||
for epoch in range(epochs):
|
||||
model.encoder.train()
|
||||
model.classifier.train()
|
||||
total_loss, n_batches = 0.0, 0
|
||||
|
||||
for anchor, positive, negative, labels in loader:
|
||||
anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]]
|
||||
z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]]
|
||||
|
||||
trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin)
|
||||
ce = ce_loss_fn(model.classifier(z_a), labels)
|
||||
loss = trip_loss + 0.5 * ce
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
total_loss += loss.item()
|
||||
n_batches += 1
|
||||
|
||||
avg_loss = total_loss / max(n_batches, 1)
|
||||
writer.add_scalar('loss', avg_loss, epoch)
|
||||
|
||||
if verbose and (epoch + 1) % 10 == 0:
|
||||
print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
|
||||
if avg_loss < best_loss:
|
||||
best_loss = avg_loss
|
||||
|
||||
writer.close()
|
||||
if verbose:
|
||||
print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}")
|
||||
|
||||
return model, ref_mdp
|
||||
|
||||
|
||||
def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50,
|
||||
lr: float = 1e-3, margin: float = 0.3, run_name: str = None):
|
||||
"""Leave-one-out cross-validation given limited samples"""
|
||||
joint = JointLoader(human_dir, agent_dir)
|
||||
data = joint.get_data()
|
||||
session_ids = list(data.keys())
|
||||
|
||||
joint_model = JointBehaviorModel(human_dir, agent_dir)
|
||||
ref_mdp = joint_model.build_MDP()
|
||||
|
||||
run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
|
||||
writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}")
|
||||
|
||||
predictions, actuals = [], []
|
||||
|
||||
for fold_idx, test_sid in enumerate(session_ids):
|
||||
train_data = {k: v for k, v in data.items() if k != test_sid}
|
||||
test_events = data[test_sid]
|
||||
test_label = 0 if test_sid.startswith('human_') else 1
|
||||
|
||||
n_human = sum(1 for k in train_data if k.startswith('human_'))
|
||||
n_agent = sum(1 for k in train_data if k.startswith('agent_'))
|
||||
if n_human == 0 or n_agent == 0:
|
||||
continue
|
||||
|
||||
try:
|
||||
dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5)
|
||||
loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True)
|
||||
|
||||
model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
|
||||
model.to_device()
|
||||
optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
|
||||
|
||||
model.encoder.train()
|
||||
model.classifier.train()
|
||||
for _ in range(epochs_per_fold):
|
||||
for anchor, positive, negative, labels in loader:
|
||||
z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]]
|
||||
loss = contrastive_loss(z_a, z_p, z_n, margin=margin)
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
test_feat = featurize_trajectory(test_events, ref_mdp, input_dim)
|
||||
pred = model.predict(test_feat.reshape(1, -1))[0]
|
||||
predictions.append(pred)
|
||||
actuals.append(test_label)
|
||||
print(f" {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
if predictions:
|
||||
acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions)
|
||||
tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1)
|
||||
fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0)
|
||||
fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1)
|
||||
prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1)
|
||||
f1 = 2 * prec * rec / max(prec + rec, 1e-10)
|
||||
writer.add_scalar('accuracy', acc, 0)
|
||||
writer.add_scalar('f1', f1, 0)
|
||||
writer.add_scalar('precision', prec, 0)
|
||||
writer.add_scalar('recall', rec, 0)
|
||||
writer.close()
|
||||
print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}")
|
||||
return acc, predictions, actuals
|
||||
writer.close()
|
||||
return 0.0, [], []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--mode', choices=['train', 'eval'], default='train')
|
||||
parser.add_argument('--epochs', type=int, default=100)
|
||||
parser.add_argument('--lr', type=float, default=1e-3)
|
||||
parser.add_argument('--margin', type=float, default=0.3)
|
||||
parser.add_argument('--input-dim', type=int, default=64)
|
||||
parser.add_argument('--embed-dim', type=int, default=32)
|
||||
parser.add_argument('--run-name', type=str, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == 'train':
|
||||
model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim,
|
||||
embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name)
|
||||
else:
|
||||
evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs,
|
||||
lr=args.lr, margin=args.margin, run_name=args.run_name)
|
||||
114
experiments/procesing/contaminator.py
Normal file
114
experiments/procesing/contaminator.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from lib.separability import estimate_alpha, load_artifacts, score_session
|
||||
|
||||
|
||||
# use relative import when in package context, fallback for standalone
|
||||
try:
|
||||
from sim.rl.behavior_loader.models import AgentBehaviorModel
|
||||
except ImportError:
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader"))
|
||||
from models import AgentBehaviorModel
|
||||
|
||||
# paths should be configurable via environment or relative to project root
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
||||
AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data"))
|
||||
|
||||
try:
|
||||
SEPARABILITY_ARTIFACTS = load_artifacts()
|
||||
except FileNotFoundError:
|
||||
SEPARABILITY_ARTIFACTS = None
|
||||
|
||||
|
||||
def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame:
|
||||
"""remap column values according to mapping dict, preserving unmapped values"""
|
||||
df = df.copy()
|
||||
df[on] = df[on].map(mapping).fillna(df[on])
|
||||
return df
|
||||
|
||||
|
||||
def _states_to_events(states: list[str]) -> list[SimpleNamespace]:
|
||||
events: list[SimpleNamespace] = []
|
||||
for idx, state in enumerate(states):
|
||||
parts = state.split("|") if isinstance(state, str) else ["page", "product", str(state)]
|
||||
page = f"/{parts[0]}" if parts else "/"
|
||||
product = parts[1] if len(parts) > 1 else "unknown"
|
||||
event_name = parts[2] if len(parts) > 2 else parts[-1]
|
||||
events.append(
|
||||
SimpleNamespace(
|
||||
eventName=event_name,
|
||||
page=page,
|
||||
productId=product,
|
||||
ts=float(idx),
|
||||
)
|
||||
)
|
||||
return events
|
||||
|
||||
def contaminate_dataset(df: pd.DataFrame, on: str = "event_type",
|
||||
contamination_rate: float = 0.1,
|
||||
agent_data_dir: Path = None) -> pd.DataFrame:
|
||||
"""inject synthetic agent trajectories into a dataset
|
||||
contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents)
|
||||
"""
|
||||
data_dir = agent_data_dir or AGENT_DATA_DIR
|
||||
model = AgentBehaviorModel(str(data_dir))
|
||||
model.build_MDP() # ensure MDP is built before sampling
|
||||
|
||||
# compute event distribution from original data
|
||||
event_dist = df[on].value_counts(normalize=True).to_dict()
|
||||
total = sum(event_dist.values())
|
||||
event_dist = {k: v / total for k, v in event_dist.items()}
|
||||
|
||||
# calculate how many synthetic events to add
|
||||
N = len(df)
|
||||
N_final = N / (1 - contamination_rate)
|
||||
N_contaminate = int(N_final - N)
|
||||
|
||||
# sample start states weighted by original distribution
|
||||
start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate)
|
||||
|
||||
# generate synthetic trajectories
|
||||
new_rows = []
|
||||
alpha_estimates = []
|
||||
|
||||
for start_event in start_events:
|
||||
# sample trajectory from agent model, using a state that contains the event type
|
||||
mdp_states = model.mdp.get('states', []) if model.mdp else []
|
||||
matching_starts = [s for s in mdp_states if start_event in s]
|
||||
if not matching_starts:
|
||||
continue # skip if no matching start state
|
||||
start_state = random.choice(matching_starts)
|
||||
trajectory = model.sample_traj(start_state, max_len=20)
|
||||
score_payload: list[SimpleNamespace] = []
|
||||
score: dict[str, float] = {}
|
||||
if SEPARABILITY_ARTIFACTS:
|
||||
score_payload = _states_to_events(trajectory)
|
||||
score = score_session(score_payload, SEPARABILITY_ARTIFACTS)
|
||||
alpha_estimates.append(
|
||||
estimate_alpha(score["prob_agent"], score["delta_h"], score["delta_a"], temperature=2.0)
|
||||
)
|
||||
|
||||
for state in trajectory:
|
||||
parts = state.split('|') if isinstance(state, str) else [start_event]
|
||||
new_rows.append({
|
||||
on: parts[-1] if parts else start_event,
|
||||
'source': 'synthetic_agent',
|
||||
'prob_agent': score.get('prob_agent') if SEPARABILITY_ARTIFACTS and score_payload else None,
|
||||
'delta_h': score.get('delta_h') if SEPARABILITY_ARTIFACTS and score_payload else None,
|
||||
'delta_a': score.get('delta_a') if SEPARABILITY_ARTIFACTS and score_payload else None,
|
||||
})
|
||||
|
||||
if new_rows:
|
||||
contaminate_df = pd.DataFrame(new_rows)
|
||||
df = pd.concat([df, contaminate_df], ignore_index=True)
|
||||
if alpha_estimates:
|
||||
df['estimated_alpha'] = sum(alpha_estimates) / len(alpha_estimates)
|
||||
return df
|
||||
@@ -7,15 +7,6 @@ import pandas as pd
|
||||
class PricingFunction(ABC):
|
||||
"""
|
||||
Abstract base for pricing functions.
|
||||
|
||||
Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
|
||||
|
||||
Where:
|
||||
Q_t ∈ R^n: demand vector at time t
|
||||
P_t ∈ R^n: price vector at time t
|
||||
S_t: session features (behavioral signals, interactions)
|
||||
H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
|
||||
|
||||
Objective:
|
||||
maximize E[R_T] = E[Σ P_t^T · Q_t]
|
||||
subject to:
|
||||
@@ -28,10 +19,10 @@ class PricingFunction(ABC):
|
||||
def fit(self, *kwargs):
|
||||
"""
|
||||
Offline training on historical data.
|
||||
This is where we can think about some maximization of expected revenue
|
||||
over historical trajectories to learn parameters of the pricing function.
|
||||
(This however we cover move in the RL side of things)
|
||||
|
||||
Args:
|
||||
historical_data: DataFrame with elasticity, prices, demand signals
|
||||
**kwargs: additional training parameters
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -39,12 +30,18 @@ class PricingFunction(ABC):
|
||||
def predict(self, *kwargs) -> np.ndarray:
|
||||
"""
|
||||
Generate optimal prices given current state.
|
||||
This is an abstract method that transitions from τ -> P*
|
||||
which is the mapping from the trajectory to optimal prices under
|
||||
some subset of session grouping (so, per sessionId)
|
||||
"""
|
||||
pass
|
||||
|
||||
Args:
|
||||
state_space: StateSpace object containing Q_t, P_t, S_t, H_t
|
||||
|
||||
@abstractmethod
|
||||
def _get_features(self, *kwargs) -> np.ndarray:
|
||||
"""
|
||||
Extract features from trajectory for pricing decision.
|
||||
Returns:
|
||||
P_{t+1}: price vector in R^n
|
||||
np.ndarray of shape (n_products, n_features)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction):
|
||||
# enforce bounds
|
||||
prices = np.clip(prices, self.price_floor, self.price_ceil)
|
||||
return prices
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Extract elasticity, demand, and demand deviation for each product"""
|
||||
if state_space is None or self.elasticity is None:
|
||||
n = len(self.elasticity) if self.elasticity is not None else 0
|
||||
return np.zeros((n, 3))
|
||||
|
||||
demand = np.asarray(state_space.demand)
|
||||
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
|
||||
return np.column_stack([self.elasticity, demand, demand_dev])
|
||||
|
||||
@@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction):
|
||||
|
||||
return prices
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Extract elasticity, demand, and session features"""
|
||||
if state_space is None or self.elasticity is None:
|
||||
n = len(self.elasticity) if self.elasticity is not None else 0
|
||||
return np.zeros((n, 5))
|
||||
|
||||
demand = np.asarray(state_space.demand)
|
||||
n_products = len(demand)
|
||||
|
||||
# extract session features
|
||||
velocity = 0.0
|
||||
view_depth = 0.0
|
||||
cart_to_view = 0.0
|
||||
|
||||
if not state_space.session_features.empty:
|
||||
sf = state_space.session_features.iloc[0]
|
||||
velocity = sf.get('interaction_velocity', 0.0)
|
||||
view_depth = sf.get('product_view_depth', 0.0)
|
||||
cart_to_view = sf.get('cart_to_view_ratio', 0.0)
|
||||
|
||||
# broadcast session features to all products
|
||||
features = np.column_stack([
|
||||
self.elasticity,
|
||||
demand,
|
||||
np.full(n_products, velocity),
|
||||
np.full(n_products, view_depth),
|
||||
np.full(n_products, cart_to_view)
|
||||
])
|
||||
return features
|
||||
|
||||
|
||||
class ProductSpecificSessionPricer(PricingFunction):
|
||||
"""
|
||||
@@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction):
|
||||
|
||||
prices = np.clip(base_prices, self.price_floor, self.price_ceil)
|
||||
return prices
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Extract elasticity and demand features for product-specific pricing"""
|
||||
if state_space is None or self.elasticity is None:
|
||||
n = len(self.elasticity) if self.elasticity is not None else 0
|
||||
return np.zeros((n, 2))
|
||||
|
||||
demand = np.asarray(state_space.demand)
|
||||
return np.column_stack([self.elasticity, demand])
|
||||
|
||||
@@ -3,6 +3,46 @@ import pandas as pd
|
||||
from procesing.pricers.base import PricingFunction
|
||||
|
||||
|
||||
def session_features_to_demand(session_features: pd.DataFrame) -> float:
|
||||
"""
|
||||
Map session behavioral features to demand proxy.
|
||||
THIS is the critical θ̂ → D transformation for rule-based pricing.
|
||||
|
||||
Logic:
|
||||
- High velocity → agent behavior → price up (revenue recovery)
|
||||
- High cart ratio → purchase intent → price up
|
||||
- Low activity → discount to convert
|
||||
|
||||
Returns: demand proxy score (0-20 range, higher = more demand)
|
||||
"""
|
||||
if session_features.empty:
|
||||
return 1.0
|
||||
|
||||
feat = session_features.iloc[0] if len(session_features) > 0 else {}
|
||||
|
||||
velocity = feat.get('interaction_velocity', 0)
|
||||
cart_ratio = feat.get('cart_to_view_ratio', 0)
|
||||
item_views = feat.get('item_views', 0)
|
||||
cart_adds = feat.get('cart_adds', 0)
|
||||
|
||||
# baseline demand
|
||||
demand = 1.0
|
||||
|
||||
# agent detection: high velocity → treat as high "demand" to price up
|
||||
if velocity > 2.0:
|
||||
demand += 10.0 # strong agent signal
|
||||
|
||||
# conversion intent: cart interaction → price up
|
||||
if cart_ratio > 0.1 or cart_adds > 0:
|
||||
demand += 5.0
|
||||
|
||||
# browsing depth: many views → interest signal
|
||||
if item_views > 3:
|
||||
demand += min(item_views, 5.0)
|
||||
|
||||
return min(demand, 20.0) # cap at 20
|
||||
|
||||
|
||||
class StaticPricer(PricingFunction):
|
||||
"""Static pricing: always return fixed base prices"""
|
||||
|
||||
@@ -25,6 +65,11 @@ class StaticPricer(PricingFunction):
|
||||
raise ValueError("Must call fit() or provide base_prices in constructor")
|
||||
return self.base_prices.copy()
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Static pricer uses no features, returns empty array"""
|
||||
n = len(self.base_prices) if self.base_prices is not None else 0
|
||||
return np.zeros((n, 0))
|
||||
|
||||
|
||||
class RandomPricer(PricingFunction):
|
||||
"""Random pricing within bounds (for baseline comparison)"""
|
||||
@@ -47,6 +92,11 @@ class RandomPricer(PricingFunction):
|
||||
self.n_products = len(state_space.demand)
|
||||
return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Random pricer uses no features"""
|
||||
n = self.n_products if self.n_products else 0
|
||||
return np.zeros((n, 0))
|
||||
|
||||
|
||||
class SimpleSurgePricer(PricingFunction):
|
||||
"""
|
||||
@@ -67,21 +117,25 @@ class SimpleSurgePricer(PricingFunction):
|
||||
self.surge_multiplier = surge_multiplier
|
||||
self.discount_multiplier = discount_multiplier
|
||||
|
||||
def fit(self, market_data : pd.DataFrame):
|
||||
def fit(self, market_data: pd.DataFrame):
|
||||
"""Extract base prices from product catalog or historical averages"""
|
||||
self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values
|
||||
self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices)
|
||||
return self
|
||||
|
||||
def predict(self) -> np.ndarray:
|
||||
def predict(self, state_space) -> np.ndarray:
|
||||
"""
|
||||
Adjust prices based on current demand using surge rules.
|
||||
state_space.demand: demand counts per product
|
||||
state_space.prices: current prices (fallback if base_prices not set)
|
||||
state_space.demand: demand proxy per product (from session features)
|
||||
state_space.prices: base prices
|
||||
"""
|
||||
current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99
|
||||
demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices)
|
||||
new_prices = current_prices.copy()
|
||||
demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0])
|
||||
base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices
|
||||
|
||||
if base is None:
|
||||
base = np.ones(len(demand)) * 99.99
|
||||
|
||||
# ensure float dtype to allow multiplication by float multipliers
|
||||
new_prices = base.astype(np.float64).copy()
|
||||
high_mask = demand >= self.high_threshold
|
||||
new_prices[high_mask] *= self.surge_multiplier
|
||||
|
||||
@@ -89,3 +143,16 @@ class SimpleSurgePricer(PricingFunction):
|
||||
new_prices[low_mask] *= self.discount_multiplier
|
||||
|
||||
return new_prices
|
||||
|
||||
def _get_features(self, state_space=None) -> np.ndarray:
|
||||
"""Extract demand and base price features for each product"""
|
||||
if state_space is None:
|
||||
n = len(self.base_prices) if self.base_prices is not None else 0
|
||||
return np.zeros((n, 2))
|
||||
|
||||
demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0])
|
||||
base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices
|
||||
if base is None:
|
||||
base = np.ones(len(demand)) * 99.99
|
||||
|
||||
return np.column_stack([demand, base])
|
||||
|
||||
@@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep):
|
||||
Vectorized session feature extraction - replaces O(n^2) per-row loop.
|
||||
Input: interactions_df
|
||||
Output: session-level feature matrix
|
||||
THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation.
|
||||
"""
|
||||
|
||||
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
@@ -6,6 +6,7 @@ from procesing.steps import (
|
||||
)
|
||||
|
||||
def test_compute_demand(pipeline_context):
|
||||
random.seed(42) # deterministic test
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
@@ -26,6 +27,7 @@ def test_compute_demand(pipeline_context):
|
||||
|
||||
|
||||
def test_compute_demand_skewed(pipeline_context):
|
||||
random.seed(42) # deterministic test
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
|
||||
41
lib/__init__.py
Normal file
41
lib/__init__.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""PHANTOM shared library
|
||||
Exports unified utilities for features, state, config, kafka, and model registry
|
||||
"""
|
||||
from .config import (
|
||||
PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR,
|
||||
AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR,
|
||||
COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR,
|
||||
ensure_dir, get_data_path, get_experiments_path, get_sim_path,
|
||||
KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER,
|
||||
REDIS_HOST, REDIS_PORT,
|
||||
SUPABASE_URL, SUPABASE_ANON_KEY,
|
||||
BACKEND_PORT, PROVIDER_PORT
|
||||
)
|
||||
from .state import (
|
||||
make_state_repr, event_to_state, parse_state,
|
||||
get_event_name, get_timestamp,
|
||||
create_state_fn, create_event_name_fn, create_timestamp_fn
|
||||
)
|
||||
from .features import (
|
||||
transition_histogram, temporal_signature, state_coverage, transition_entropy,
|
||||
event_type_distribution, featurize_trajectory, parse_timestamp
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# config
|
||||
'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR',
|
||||
'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR',
|
||||
'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR',
|
||||
'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path',
|
||||
'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER',
|
||||
'REDIS_HOST', 'REDIS_PORT',
|
||||
'SUPABASE_URL', 'SUPABASE_ANON_KEY',
|
||||
'BACKEND_PORT', 'PROVIDER_PORT',
|
||||
# state
|
||||
'make_state_repr', 'event_to_state', 'parse_state',
|
||||
'get_event_name', 'get_timestamp',
|
||||
'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn',
|
||||
# features
|
||||
'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy',
|
||||
'event_type_distribution', 'featurize_trajectory', 'parse_timestamp',
|
||||
]
|
||||
65
lib/config.py
Normal file
65
lib/config.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Unified path configuration for PHANTOM project
|
||||
All hardcoded paths should reference this module
|
||||
Paths can be overridden via environment variables
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# project root (directory containing lib/, experiments/, sim/, web/, backend/)
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||
|
||||
# data directories
|
||||
DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data'))
|
||||
EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments'))
|
||||
|
||||
# agent/human interaction data
|
||||
AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents'))
|
||||
HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans'))
|
||||
|
||||
# RL simulation runs
|
||||
SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs'))
|
||||
|
||||
# model artifacts
|
||||
MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models'))
|
||||
|
||||
# collected experiment data
|
||||
COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data'))
|
||||
|
||||
# notebook outputs
|
||||
NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs'))
|
||||
|
||||
|
||||
def ensure_dir(path: Path) -> Path:
|
||||
"""ensure directory exists, create if needed"""
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def get_data_path(*parts: str) -> Path:
|
||||
"""construct path relative to DATA_DIR"""
|
||||
return DATA_DIR.joinpath(*parts)
|
||||
|
||||
|
||||
def get_experiments_path(*parts: str) -> Path:
|
||||
"""construct path relative to EXPERIMENTS_DIR"""
|
||||
return EXPERIMENTS_DIR.joinpath(*parts)
|
||||
|
||||
|
||||
def get_sim_path(*parts: str) -> Path:
|
||||
"""construct path relative to SIM_RUNS_DIR"""
|
||||
return SIM_RUNS_DIR.joinpath(*parts)
|
||||
|
||||
|
||||
# service configuration (from .env)
|
||||
KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost')
|
||||
KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
|
||||
KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
|
||||
|
||||
REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
|
||||
REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
|
||||
|
||||
SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '')
|
||||
SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '')
|
||||
|
||||
BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000'))
|
||||
PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001'))
|
||||
125
lib/features.py
Normal file
125
lib/features.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Unified featurization utilities for trajectory -> feature vector conversion
|
||||
Used by both experiments/ml/ and sim/rl/ components
|
||||
"""
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
from typing import List, Dict, Callable, Optional, Any, Set
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray:
|
||||
"""compute normalized histogram of state transitions in trajectory
|
||||
events: list of event objects/dicts
|
||||
state_fn: function mapping event -> state string
|
||||
max_states: maximum dimensions for histogram
|
||||
"""
|
||||
if len(events) < 2:
|
||||
return np.zeros(max_states, dtype=np.float32)
|
||||
states = [state_fn(e) for e in events]
|
||||
trans_counts = defaultdict(int)
|
||||
for s, s_next in zip(states, states[1:]):
|
||||
trans_counts[(s, s_next)] += 1
|
||||
total = sum(trans_counts.values())
|
||||
hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
|
||||
hist = np.pad(hist, (0, max(0, max_states - len(hist))))
|
||||
return hist / (total + 1e-10)
|
||||
|
||||
|
||||
def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray:
|
||||
"""extract temporal features: mean/std/skew of inter-event times plus count
|
||||
events: list of event objects/dicts
|
||||
ts_fn: function mapping event -> timestamp (float seconds)
|
||||
returns: [mean_dt, std_dt, skew, n_intervals] array
|
||||
"""
|
||||
if len(events) < 2:
|
||||
return np.zeros(4, dtype=np.float32)
|
||||
times = sorted([ts_fn(e) for e in events])
|
||||
diffs = np.diff(times).astype(np.float32)
|
||||
if len(diffs) == 0:
|
||||
return np.zeros(4, dtype=np.float32)
|
||||
mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
|
||||
skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
|
||||
return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
|
||||
|
||||
|
||||
def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float:
|
||||
"""fraction of MDP states visited by trajectory
|
||||
events: list of event objects/dicts
|
||||
state_fn: function mapping event -> state string
|
||||
mdp_states: set of all possible MDP states
|
||||
"""
|
||||
if not mdp_states:
|
||||
return 0.0
|
||||
visited = set(state_fn(e) for e in events)
|
||||
return len(visited & mdp_states) / len(mdp_states)
|
||||
|
||||
|
||||
def transition_entropy(events: List, state_fn: Callable) -> float:
|
||||
"""compute entropy of transition distribution (randomness of navigation)
|
||||
higher entropy = more random browsing pattern
|
||||
"""
|
||||
if len(events) < 2:
|
||||
return 0.0
|
||||
states = [state_fn(e) for e in events]
|
||||
trans_counts = defaultdict(int)
|
||||
for s, s_next in zip(states, states[1:]):
|
||||
trans_counts[(s, s_next)] += 1
|
||||
total = sum(trans_counts.values())
|
||||
probs = [c / total for c in trans_counts.values()]
|
||||
return -sum(p * np.log(p + 1e-10) for p in probs)
|
||||
|
||||
|
||||
def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray:
|
||||
"""compute proportions of different event type categories
|
||||
returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio]
|
||||
"""
|
||||
if not events:
|
||||
return np.zeros(4, dtype=np.float32)
|
||||
n = len(events)
|
||||
names = [event_name_fn(e).lower() for e in events]
|
||||
return np.array([
|
||||
sum(1 for nm in names if 'page' in nm or 'view' in nm) / n,
|
||||
sum(1 for nm in names if 'hover' in nm) / n,
|
||||
sum(1 for nm in names if 'cart' in nm) / n,
|
||||
sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n
|
||||
], dtype=np.float32)
|
||||
|
||||
|
||||
def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable,
|
||||
event_name_fn: Callable, mdp_states: Optional[Set[str]] = None,
|
||||
output_dim: int = 64) -> np.ndarray:
|
||||
"""convert trajectory to fixed-dimension feature vector
|
||||
events: list of event objects/dicts
|
||||
state_fn: function mapping event -> state string
|
||||
ts_fn: function mapping event -> timestamp (float)
|
||||
event_name_fn: function mapping event -> event name string
|
||||
mdp_states: optional set of all MDP states for coverage calculation
|
||||
output_dim: desired output dimension (will pad/truncate)
|
||||
"""
|
||||
feats = []
|
||||
feats.extend(transition_histogram(events, state_fn, max_states=40)) # 40 dims
|
||||
feats.extend(temporal_signature(events, ts_fn)) # 4 dims
|
||||
feats.append(state_coverage(events, state_fn, mdp_states or set())) # 1 dim
|
||||
feats.append(transition_entropy(events, state_fn)) # 1 dim
|
||||
feats.append(float(len(events))) # trajectory length
|
||||
feats.append(float(len(set(state_fn(e) for e in events)))) # unique states
|
||||
feats.extend(event_type_distribution(events, event_name_fn)) # 4 dims
|
||||
|
||||
feats = np.array(feats[:output_dim], dtype=np.float32)
|
||||
if len(feats) < output_dim:
|
||||
feats = np.pad(feats, (0, output_dim - len(feats)))
|
||||
return feats
|
||||
|
||||
|
||||
def parse_timestamp(ts: Any) -> float:
|
||||
"""parse various timestamp formats to float seconds"""
|
||||
if ts is None:
|
||||
return 0.0
|
||||
if isinstance(ts, (int, float)):
|
||||
return float(ts)
|
||||
if isinstance(ts, str):
|
||||
try:
|
||||
return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
|
||||
except ValueError:
|
||||
return 0.0
|
||||
return 0.0
|
||||
54
lib/kafka_client.py
Executable file
54
lib/kafka_client.py
Executable file
@@ -0,0 +1,54 @@
|
||||
from kafka import KafkaConsumer
|
||||
import json
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
def get_interactions(
|
||||
topic='user-interactions',
|
||||
bootstrap_servers=None,
|
||||
from_beginning=True,
|
||||
max_records=None,
|
||||
timeout_ms=5000
|
||||
):
|
||||
"""Consume interaction events from Kafka.
|
||||
|
||||
Args:
|
||||
topic: Kafka topic name
|
||||
bootstrap_servers: Kafka broker address (default from env)
|
||||
from_beginning: Start from earliest offset if True
|
||||
max_records: Max number of records to fetch (None = all available)
|
||||
timeout_ms: Consumer poll timeout
|
||||
|
||||
Returns:
|
||||
List of parsed interaction event dicts
|
||||
"""
|
||||
if not bootstrap_servers:
|
||||
host = os.getenv('KAFKA_HOST', 'localhost')
|
||||
port = os.getenv('KAFKA_PORT', '9092')
|
||||
bootstrap_servers = f'{host}:{port}'
|
||||
|
||||
consumer = KafkaConsumer(
|
||||
topic,
|
||||
bootstrap_servers=bootstrap_servers,
|
||||
auto_offset_reset='earliest' if from_beginning else 'latest',
|
||||
enable_auto_commit=False,
|
||||
value_deserializer=lambda m: json.loads(m.decode('utf-8')),
|
||||
consumer_timeout_ms=timeout_ms
|
||||
)
|
||||
|
||||
events = []
|
||||
try:
|
||||
for msg in consumer:
|
||||
events.append(msg.value)
|
||||
if max_records and len(events) >= max_records:
|
||||
break
|
||||
finally:
|
||||
consumer.close()
|
||||
|
||||
return events
|
||||
|
||||
if __name__ == '__main__':
|
||||
interactions = get_interactions(max_records=10)
|
||||
for event in interactions:
|
||||
print(event)
|
||||
@@ -178,3 +178,49 @@ class ModelRegistry:
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800):
|
||||
"""
|
||||
Store prices for a specific session.
|
||||
THIS is the write path for session-aware pricing.
|
||||
|
||||
Args:
|
||||
session_id: session identifier
|
||||
prices: dict of {productId: price}
|
||||
ttl: time-to-live in seconds (default 30min)
|
||||
"""
|
||||
if not prices:
|
||||
return
|
||||
|
||||
key = f"session:{session_id}:prices"
|
||||
# use Redis hash for O(1) lookup per product
|
||||
self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()})
|
||||
self.redis_client.expire(key, ttl)
|
||||
|
||||
def get_session_price(self, session_id: str, product_id: str) -> Optional[float]:
|
||||
"""
|
||||
Lookup price for (sessionId, productId).
|
||||
THIS is the read path for fast provider lookup.
|
||||
|
||||
Returns: price or None if not found
|
||||
"""
|
||||
key = f"session:{session_id}:prices"
|
||||
price_str = self.redis_client.hget(key, product_id)
|
||||
|
||||
if price_str is None:
|
||||
return None
|
||||
|
||||
return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str)
|
||||
|
||||
def get_session_all_prices(self, session_id: str) -> Dict[str, float]:
|
||||
"""Get all prices for a session."""
|
||||
key = f"session:{session_id}:prices"
|
||||
prices_raw = self.redis_client.hgetall(key)
|
||||
|
||||
if not prices_raw:
|
||||
return {}
|
||||
|
||||
return {
|
||||
(k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v)
|
||||
for k, v in prices_raw.items()
|
||||
}
|
||||
|
||||
72
lib/state.py
Normal file
72
lib/state.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Unified state representation utilities for MDP state encoding
|
||||
Used by both experiments/ and sim/ components for consistent state handling
|
||||
"""
|
||||
from typing import Any, Callable
|
||||
|
||||
|
||||
def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str:
|
||||
"""create canonical state representation string from components
|
||||
format: page|productId|eventName
|
||||
"""
|
||||
p = page or 'unk'
|
||||
pid = product_id or 'none'
|
||||
en = event_name or 'unknown'
|
||||
return f"{p}|{pid}|{en}"
|
||||
|
||||
|
||||
def event_to_state(evt: Any) -> str:
|
||||
"""convert event object/dict to state string
|
||||
supports both object attributes and dict keys
|
||||
"""
|
||||
if isinstance(evt, dict):
|
||||
return make_state_repr(
|
||||
page=evt.get('page'),
|
||||
product_id=evt.get('productId'),
|
||||
event_name=evt.get('eventName') or evt.get('event_type')
|
||||
)
|
||||
return make_state_repr(
|
||||
page=getattr(evt, 'page', None),
|
||||
product_id=getattr(evt, 'productId', None),
|
||||
event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None)
|
||||
)
|
||||
|
||||
|
||||
def parse_state(state_str: str) -> dict:
|
||||
"""parse state string back to components
|
||||
returns: {'page': str, 'productId': str, 'eventName': str}
|
||||
"""
|
||||
parts = state_str.split('|')
|
||||
return {
|
||||
'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None,
|
||||
'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None,
|
||||
'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None
|
||||
}
|
||||
|
||||
|
||||
def get_event_name(evt: Any) -> str:
|
||||
"""extract event name from event object/dict"""
|
||||
if isinstance(evt, dict):
|
||||
return evt.get('eventName') or evt.get('event_type') or ''
|
||||
return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or ''
|
||||
|
||||
|
||||
def get_timestamp(evt: Any) -> Any:
|
||||
"""extract timestamp from event object/dict"""
|
||||
if isinstance(evt, dict):
|
||||
return evt.get('ts') or evt.get('timestamp')
|
||||
return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None)
|
||||
|
||||
|
||||
def create_state_fn() -> Callable:
|
||||
"""factory for state representation function"""
|
||||
return event_to_state
|
||||
|
||||
|
||||
def create_event_name_fn() -> Callable:
|
||||
"""factory for event name extraction function"""
|
||||
return get_event_name
|
||||
|
||||
|
||||
def create_timestamp_fn() -> Callable:
|
||||
"""factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)"""
|
||||
return get_timestamp
|
||||
2
sim/case/__init__.py
Normal file
2
sim/case/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Case-specific simulations and experiments."""
|
||||
|
||||
2
sim/case/thesis_simplified/__init__.py
Normal file
2
sim/case/thesis_simplified/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Minimal thesis-aligned pricing simulation (self-contained)."""
|
||||
|
||||
125
sim/case/thesis_simplified/coi.py
Normal file
125
sim/case/thesis_simplified/coi.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Cost of Information (COI) computation for thesis pricing system.
|
||||
|
||||
Core KPI: COI = E[p_shown] - p_min measures pricing power from information asymmetry.
|
||||
Theorem 1 shows COI erodes as agent queries increase: as N->inf, p^(1)->p_min.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, TYPE_CHECKING
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .simplified import Session
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class COIWindow:
|
||||
"""Windowed COI metrics computed from realized price exposures.
|
||||
|
||||
policy: E[p_shown] - cost, the definition-level KPI
|
||||
agent: E[p^(1)] - cost where p^(1) is min price under agent querying
|
||||
leak: max(policy - agent, 0), observable gap from reconnaissance
|
||||
survival_ratio: agent/policy, fraction of pricing power retained
|
||||
"""
|
||||
policy: float
|
||||
agent: float
|
||||
leak: float
|
||||
survival_ratio: float
|
||||
policy_by_product: np.ndarray
|
||||
agent_by_product: np.ndarray
|
||||
demand_weights: np.ndarray
|
||||
|
||||
|
||||
def aggregate_prices(sessions: List["Session"], mode: str = "all") -> Dict[int, List[float] | float]:
|
||||
"""Unified price aggregation across sessions.
|
||||
|
||||
mode: "all" returns all prices per product, "min_per_session" returns min price per session per product,
|
||||
"min_across" returns single min price per product
|
||||
"""
|
||||
if mode == "min_across":
|
||||
mins: Dict[int, float] = {}
|
||||
for s in sessions:
|
||||
for e in s.events:
|
||||
pidx, price = int(e.product_idx), float(e.price_seen)
|
||||
mins[pidx] = min(mins.get(pidx, price), price)
|
||||
return mins
|
||||
elif mode == "min_per_session":
|
||||
result: Dict[int, List[float]] = {}
|
||||
for s in sessions:
|
||||
by_p: Dict[int, float] = {}
|
||||
for e in s.events:
|
||||
pidx, price = int(e.product_idx), float(e.price_seen)
|
||||
by_p[pidx] = min(by_p.get(pidx, price), price)
|
||||
for pidx, pmin in by_p.items():
|
||||
result.setdefault(pidx, []).append(pmin)
|
||||
return result
|
||||
else: # "all"
|
||||
prices: Dict[int, List[float]] = {}
|
||||
for s in sessions:
|
||||
for e in s.events:
|
||||
prices.setdefault(e.product_idx, []).append(float(e.price_seen))
|
||||
return prices
|
||||
|
||||
|
||||
def demand_weights_by_product(sessions: List["Session"], demand_mapping: Dict[str, float], n_products: int) -> np.ndarray:
|
||||
"""Compute demand-weighted importance per product."""
|
||||
w = np.zeros(n_products, dtype=float)
|
||||
sessions_by_id = {s.sid: s for s in sessions}
|
||||
for sid, q in demand_mapping.items():
|
||||
sess = sessions_by_id.get(sid)
|
||||
if sess and sess.events:
|
||||
w[int(sess.events[0].product_idx)] += float(q)
|
||||
total = float(np.sum(w))
|
||||
return (w / total) if total > 0 else w
|
||||
|
||||
|
||||
def compute_coi_window(sessions: List["Session"], costs: np.ndarray, demand_mapping: Dict[str, float] | None = None) -> COIWindow:
|
||||
"""Compute COI metrics over session window.
|
||||
|
||||
Aggregates price exposures and computes policy-level vs agent-realized COI.
|
||||
"""
|
||||
n = int(len(costs))
|
||||
prices = aggregate_prices(sessions, mode="all")
|
||||
agent_sessions = [s for s in sessions if s.actor == "A"]
|
||||
agent_min = aggregate_prices(agent_sessions, mode="min_across") if agent_sessions else {}
|
||||
|
||||
policy_by = np.zeros(n, dtype=float)
|
||||
agent_by = np.zeros(n, dtype=float)
|
||||
seen = np.array([(i in prices) for i in range(n)], dtype=bool)
|
||||
agent_seen = np.array([(i in agent_min) for i in range(n)], dtype=bool)
|
||||
|
||||
for pidx, ps in prices.items():
|
||||
if 0 <= pidx < n and ps:
|
||||
policy_by[pidx] = float(np.mean(ps) - float(costs[pidx]))
|
||||
for pidx, pmin in agent_min.items():
|
||||
if 0 <= pidx < n:
|
||||
agent_by[pidx] = float(pmin - float(costs[pidx]))
|
||||
|
||||
agent_by[seen & ~agent_seen] = policy_by[seen & ~agent_seen] # no erosion if no agent exposure
|
||||
|
||||
demand_w = demand_weights_by_product(sessions, demand_mapping, n) if demand_mapping else np.zeros(n, dtype=float)
|
||||
has_weights = float(np.sum(demand_w)) > 0
|
||||
|
||||
if has_weights:
|
||||
policy, agent = float(np.dot(demand_w, policy_by)), float(np.dot(demand_w, agent_by))
|
||||
elif np.any(seen):
|
||||
policy, agent = float(np.mean(policy_by[seen])), float(np.mean(agent_by[seen]))
|
||||
else:
|
||||
policy, agent = 0.0, 0.0
|
||||
|
||||
leak = float(max(policy - agent, 0.0))
|
||||
survival = float(np.clip(agent / policy, 0.0, 1.0)) if policy > 0 else 0.0
|
||||
|
||||
return COIWindow(policy=policy, agent=agent, leak=leak, survival_ratio=survival,
|
||||
policy_by_product=policy_by, agent_by_product=agent_by, demand_weights=demand_w)
|
||||
|
||||
|
||||
def coi_erosion(coi_policy: float, coi_agent: float, eps: float = 1e-9) -> float:
|
||||
"""Thesis-consistent COI erosion: fraction of pricing power destroyed by agent queries.
|
||||
|
||||
erosion = 1 - (COI_agent / COI_policy)
|
||||
When agents find low prices, COI_agent -> 0, erosion -> 1.
|
||||
"""
|
||||
if coi_policy <= eps:
|
||||
return 0.0
|
||||
return float(np.clip(1.0 - (coi_agent / (coi_policy + eps)), 0.0, 1.0))
|
||||
325
sim/case/thesis_simplified/experiments.py
Normal file
325
sim/case/thesis_simplified/experiments.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""COI leakage experiments and policy comparisons.
|
||||
|
||||
Demonstrates the core thesis contribution: COI erosion under agent contamination
|
||||
and recovery via robust pricing policies.
|
||||
|
||||
Generates TensorBoard logs for:
|
||||
- COI erosion curves across contamination levels
|
||||
- Policy comparison (fixed vs adaptive vs RL)
|
||||
- Revenue/margin trade-offs
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
HAS_TB = True
|
||||
except ImportError:
|
||||
HAS_TB = False
|
||||
|
||||
from .simplified_env import PricingEnv, EnvConfig, make_env
|
||||
from .simplified import System
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExperimentResult:
|
||||
"""Container for experiment metrics."""
|
||||
name: str
|
||||
alpha: float
|
||||
reward_mean: float
|
||||
reward_std: float
|
||||
coi_erosion: float
|
||||
alpha_error: float
|
||||
revenue: float
|
||||
margin: float
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {k: getattr(self, k) for k in self.__dataclass_fields__}
|
||||
|
||||
|
||||
def theoretical_coi_erosion_curve(alphas: np.ndarray, n_sessions: int = 1000) -> np.ndarray:
|
||||
"""Theoretical COI erosion from Theorem 1 using order statistic model.
|
||||
|
||||
For N i.i.d. uniform queries on [p_min, p_max]:
|
||||
E[p^(1)] = p_min + (p_max - p_min)/(N+1), so erosion = 1 - 2/(N+1)
|
||||
"""
|
||||
erosions = []
|
||||
for a in alphas:
|
||||
n_agents = max(1, int(a * n_sessions))
|
||||
erosions.append(1.0 - 2.0 / (n_agents + 1))
|
||||
return np.array(erosions)
|
||||
|
||||
|
||||
def run_policy_episode(
|
||||
env: PricingEnv,
|
||||
policy_fn,
|
||||
n_episodes: int = 10
|
||||
) -> Tuple[List[float], List[float], List[float], List[float]]:
|
||||
"""Run policy and collect per-step metrics."""
|
||||
rewards, coi_erosions, alpha_errors, revenues = [], [], [], []
|
||||
|
||||
for _ in range(n_episodes):
|
||||
obs, info = env.reset()
|
||||
done = False
|
||||
while not done:
|
||||
action = policy_fn(obs, env.n)
|
||||
obs, reward, terminated, truncated, info = env.step(action)
|
||||
done = terminated or truncated
|
||||
rewards.append(reward)
|
||||
if 'coi_erosion' in info:
|
||||
coi_erosions.append(info['coi_erosion'])
|
||||
if 'alpha_true' in info and 'alpha_est' in info:
|
||||
alpha_errors.append(abs(info['alpha_true'] - info['alpha_est']))
|
||||
if 'revenue' in info:
|
||||
revenues.append(info['revenue'])
|
||||
|
||||
return rewards, coi_erosions, alpha_errors, revenues
|
||||
|
||||
|
||||
class PolicyRegistry:
|
||||
"""Registry of baseline policies."""
|
||||
|
||||
@staticmethod
|
||||
def fixed(obs: np.ndarray, n: int, margin: float = 0.15) -> np.ndarray:
|
||||
return np.ones(n, dtype=np.float32) * (1.0 + margin)
|
||||
|
||||
@staticmethod
|
||||
def random(obs: np.ndarray, n: int, rng: np.random.Generator = None) -> np.ndarray:
|
||||
rng = rng or np.random.default_rng()
|
||||
return rng.uniform(0.7, 1.3, n).astype(np.float32)
|
||||
|
||||
@staticmethod
|
||||
def adaptive(obs: np.ndarray, n: int, base_margin: float = 0.15) -> np.ndarray:
|
||||
"""Reduce margins when alpha estimate is high."""
|
||||
alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
|
||||
margin_scale = 1.0 - 0.4 * alpha_est
|
||||
return np.ones(n, dtype=np.float32) * (1.0 + base_margin * margin_scale)
|
||||
|
||||
@staticmethod
|
||||
def aggressive(obs: np.ndarray, n: int) -> np.ndarray:
|
||||
"""High margins, ignores contamination."""
|
||||
return np.ones(n, dtype=np.float32) * 1.4
|
||||
|
||||
@staticmethod
|
||||
def defensive(obs: np.ndarray, n: int) -> np.ndarray:
|
||||
"""Low margins, always cautious."""
|
||||
return np.ones(n, dtype=np.float32) * 1.05
|
||||
|
||||
@staticmethod
|
||||
def alpha_proportional(obs: np.ndarray, n: int, max_margin: float = 0.3) -> np.ndarray:
|
||||
"""Margin inversely proportional to estimated alpha."""
|
||||
alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
|
||||
margin = max_margin * (1.0 - alpha_est)
|
||||
return np.ones(n, dtype=np.float32) * (1.0 + margin)
|
||||
|
||||
|
||||
def run_contamination_sweep(
|
||||
alphas: List[float],
|
||||
policies: Dict[str, callable],
|
||||
n_products: int = 10,
|
||||
max_steps: int = 200,
|
||||
n_episodes: int = 10,
|
||||
seed: int = 42,
|
||||
log_dir: str = None
|
||||
) -> Dict[str, List[ExperimentResult]]:
|
||||
"""Run policies across contamination levels."""
|
||||
|
||||
results = {name: [] for name in policies}
|
||||
writer = SummaryWriter(Path(log_dir) / "sweep") if log_dir and HAS_TB else None
|
||||
|
||||
for alpha in alphas:
|
||||
print(f" alpha={alpha:.2f}", end=" ")
|
||||
env_cfg = EnvConfig(
|
||||
n_products=n_products, max_steps=max_steps,
|
||||
alpha_true=alpha, reward_mode="robust", seed=seed)
|
||||
env = make_env(env_cfg)
|
||||
|
||||
for name, policy_fn in policies.items():
|
||||
rewards, coi_vals, alpha_errs, revenues = run_policy_episode(env, policy_fn, n_episodes)
|
||||
|
||||
result = ExperimentResult(
|
||||
name=name, alpha=alpha,
|
||||
reward_mean=float(np.mean(rewards)),
|
||||
reward_std=float(np.std(rewards)),
|
||||
coi_erosion=float(np.mean(coi_vals)) if coi_vals else 0.0,
|
||||
alpha_error=float(np.mean(alpha_errs)) if alpha_errs else 0.0,
|
||||
revenue=float(np.mean(revenues)) if revenues else 0.0,
|
||||
margin=float(np.mean([policy_fn(np.zeros(3 * n_products + 3), n_products)]) - 1.0))
|
||||
|
||||
results[name].append(result)
|
||||
|
||||
if writer:
|
||||
step = int(alpha * 100)
|
||||
writer.add_scalar(f'{name}/reward', result.reward_mean, step)
|
||||
writer.add_scalar(f'{name}/coi_erosion', result.coi_erosion, step)
|
||||
writer.add_scalar(f'{name}/alpha_error', result.alpha_error, step)
|
||||
writer.add_scalar(f'{name}/revenue', result.revenue, step)
|
||||
|
||||
print(f"done")
|
||||
|
||||
# add theoretical curve
|
||||
if writer:
|
||||
theo = theoretical_coi_erosion_curve(np.array(alphas))
|
||||
for i, (a, e) in enumerate(zip(alphas, theo)):
|
||||
writer.add_scalar('theoretical/coi_erosion', e, int(a * 100))
|
||||
writer.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def run_coi_demonstration(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
|
||||
"""Main COI demonstration experiment."""
|
||||
print("=== COI Leakage Demonstration ===\n")
|
||||
|
||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||
writer = SummaryWriter(Path(log_dir) / "coi_demo") if HAS_TB else None
|
||||
|
||||
# theoretical erosion curve
|
||||
print("1. Theoretical COI erosion (Theorem 1)")
|
||||
alphas = np.linspace(0.0, 0.6, 13)
|
||||
theo_erosion = theoretical_coi_erosion_curve(alphas, n_sessions=1000)
|
||||
|
||||
for a, e in zip(alphas, theo_erosion):
|
||||
print(f" alpha={a:.2f} -> erosion={e:.3f}")
|
||||
if writer:
|
||||
writer.add_scalar('theory/coi_erosion', e, int(a * 100))
|
||||
|
||||
# policy comparison
|
||||
print("\n2. Policy comparison across contamination levels")
|
||||
policies = {
|
||||
'fixed': lambda obs, n: PolicyRegistry.fixed(obs, n),
|
||||
'aggressive': PolicyRegistry.aggressive,
|
||||
'defensive': PolicyRegistry.defensive,
|
||||
'adaptive': PolicyRegistry.adaptive,
|
||||
'alpha_proportional': PolicyRegistry.alpha_proportional,
|
||||
}
|
||||
|
||||
sweep_alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
|
||||
results = run_contamination_sweep(
|
||||
sweep_alphas, policies, n_products=10, max_steps=100,
|
||||
n_episodes=5, seed=seed, log_dir=log_dir)
|
||||
|
||||
# summarize
|
||||
print("\n3. Summary by policy")
|
||||
for name, res_list in results.items():
|
||||
avg_reward = np.mean([r.reward_mean for r in res_list])
|
||||
avg_coi = np.mean([r.coi_erosion for r in res_list])
|
||||
print(f" {name:20s}: avg_reward={avg_reward:.2f}, avg_coi={avg_coi:.3f}")
|
||||
|
||||
# save results
|
||||
output = {
|
||||
'theoretical': {'alphas': alphas.tolist(), 'erosion': theo_erosion.tolist()},
|
||||
'empirical': {name: [r.to_dict() for r in res_list] for name, res_list in results.items()}}
|
||||
|
||||
with open(Path(log_dir) / "coi_demo_results.json", 'w') as f:
|
||||
json.dump(output, f, indent=2)
|
||||
|
||||
if writer:
|
||||
writer.close()
|
||||
|
||||
print(f"\nResults saved to {log_dir}/coi_demo_results.json")
|
||||
print(f"TensorBoard: tensorboard --logdir {log_dir}")
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def run_reward_mode_comparison(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
|
||||
"""Compare different reward modes."""
|
||||
print("=== Reward Mode Comparison ===\n")
|
||||
|
||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||
writer = SummaryWriter(Path(log_dir) / "reward_modes") if HAS_TB else None
|
||||
|
||||
reward_modes = ["revenue", "profit", "robust", "coi_aware"]
|
||||
alpha = 0.3 # moderate contamination
|
||||
|
||||
results = {}
|
||||
for mode in reward_modes:
|
||||
print(f" mode={mode}", end=" ")
|
||||
env_cfg = EnvConfig(
|
||||
n_products=10, max_steps=200, alpha_true=alpha,
|
||||
reward_mode=mode, seed=seed)
|
||||
env = make_env(env_cfg)
|
||||
|
||||
rewards, coi_vals, _, revenues = run_policy_episode(
|
||||
env, PolicyRegistry.adaptive, n_episodes=10)
|
||||
|
||||
results[mode] = {
|
||||
'reward_mean': float(np.mean(rewards)),
|
||||
'reward_std': float(np.std(rewards)),
|
||||
'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
|
||||
'revenue': float(np.mean(revenues)) if revenues else 0.0}
|
||||
|
||||
if writer:
|
||||
for k, v in results[mode].items():
|
||||
writer.add_scalar(f'{mode}/{k}', v, 0)
|
||||
|
||||
print(f"reward={results[mode]['reward_mean']:.2f}, coi={results[mode]['coi_erosion']:.3f}")
|
||||
|
||||
if writer:
|
||||
writer.close()
|
||||
|
||||
with open(Path(log_dir) / "reward_mode_results.json", 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def run_alpha_drift_experiment(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
|
||||
"""Test policy robustness under non-stationary contamination."""
|
||||
print("=== Alpha Drift Experiment ===\n")
|
||||
|
||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||
writer = SummaryWriter(Path(log_dir) / "alpha_drift") if HAS_TB else None
|
||||
|
||||
drift_rates = [0.0, 0.01, 0.02, 0.05]
|
||||
results = {}
|
||||
|
||||
for drift in drift_rates:
|
||||
print(f" drift={drift:.2f}", end=" ")
|
||||
env_cfg = EnvConfig(
|
||||
n_products=10, max_steps=200, alpha_true=0.2,
|
||||
alpha_drift=drift, reward_mode="robust", seed=seed)
|
||||
env = make_env(env_cfg)
|
||||
|
||||
rewards, coi_vals, alpha_errs, _ = run_policy_episode(
|
||||
env, PolicyRegistry.adaptive, n_episodes=10)
|
||||
|
||||
results[f'drift_{drift}'] = {
|
||||
'reward_mean': float(np.mean(rewards)),
|
||||
'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
|
||||
'alpha_tracking_error': float(np.mean(alpha_errs)) if alpha_errs else 0.0}
|
||||
|
||||
if writer:
|
||||
for k, v in results[f'drift_{drift}'].items():
|
||||
writer.add_scalar(f'drift_{drift}/{k}', v, 0)
|
||||
|
||||
print(f"reward={results[f'drift_{drift}']['reward_mean']:.2f}, "
|
||||
f"alpha_err={results[f'drift_{drift}']['alpha_tracking_error']:.3f}")
|
||||
|
||||
if writer:
|
||||
writer.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Run COI experiments")
|
||||
parser.add_argument("--exp", type=str, default="coi", choices=["coi", "reward", "drift", "all"])
|
||||
parser.add_argument("--log-dir", type=str, default="sim/case/thesis_simplified/runs")
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.exp == "coi" or args.exp == "all":
|
||||
run_coi_demonstration(args.log_dir, args.seed)
|
||||
|
||||
if args.exp == "reward" or args.exp == "all":
|
||||
run_reward_mode_comparison(args.log_dir, args.seed)
|
||||
|
||||
if args.exp == "drift" or args.exp == "all":
|
||||
run_alpha_drift_experiment(args.log_dir, args.seed)
|
||||
72
sim/case/thesis_simplified/separability.py
Normal file
72
sim/case/thesis_simplified/separability.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Behavioral separability for human/agent detection.
|
||||
|
||||
Computes divergence signals delta_H, delta_A from session trajectories using
|
||||
transition kernel estimation and KL divergence to prototype behavioral profiles.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Dict, List, Tuple, TYPE_CHECKING
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .simplified import Event, Session
|
||||
|
||||
|
||||
# prototype behavioral kernels for human vs agent sessions
|
||||
TRANS_H = {
|
||||
"start": {"view": 0.85, "end": 0.15},
|
||||
"view": {"detail": 0.4, "cart": 0.3, "view": 0.2, "end": 0.1},
|
||||
"detail": {"cart": 0.5, "view": 0.3, "end": 0.2},
|
||||
"cart": {"purchase": 0.6, "view": 0.25, "end": 0.15},
|
||||
"purchase": {"end": 1.0},
|
||||
}
|
||||
|
||||
TRANS_A = {
|
||||
"start": {"view": 0.95, "end": 0.05},
|
||||
"view": {"detail": 0.6, "view": 0.25, "cart": 0.1, "end": 0.05},
|
||||
"detail": {"view": 0.5, "cart": 0.15, "detail": 0.3, "end": 0.05},
|
||||
"cart": {"view": 0.4, "purchase": 0.2, "end": 0.4},
|
||||
"purchase": {"end": 1.0},
|
||||
}
|
||||
|
||||
|
||||
def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
|
||||
"""KL divergence D_KL(p || q) for discrete distributions."""
|
||||
keys = set(p.keys()) | set(q.keys())
|
||||
return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys)
|
||||
|
||||
|
||||
def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
|
||||
"""Build empirical transition kernel T' from trajectory events."""
|
||||
trans: Dict[str, Dict[str, int]] = {}
|
||||
prev = "start"
|
||||
for e in events:
|
||||
curr = e.action
|
||||
trans.setdefault(prev, {})
|
||||
trans[prev][curr] = trans[prev].get(curr, 0) + 1
|
||||
prev = curr
|
||||
return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0}
|
||||
|
||||
|
||||
def compute_divergence(session: "Session") -> Tuple[float, float]:
|
||||
"""Compute divergence signals delta_H, delta_A for session.
|
||||
|
||||
delta_H = mean KL(T' || T_H) across states, measures distance to human prototype
|
||||
delta_A = mean KL(T' || T_A) across states, measures distance to agent prototype
|
||||
"""
|
||||
kernel = build_kernel(session.events)
|
||||
if not kernel:
|
||||
return 0.5, 0.5
|
||||
delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel)
|
||||
delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel)
|
||||
return delta_h, delta_a
|
||||
|
||||
|
||||
def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
|
||||
"""Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)).
|
||||
|
||||
Returns probability session is agent-generated based on behavioral divergence.
|
||||
"""
|
||||
dh, da = compute_divergence(session)
|
||||
if (dh + da) <= 0:
|
||||
return 0.5
|
||||
return 1.0 / (1.0 + np.exp(-beta * (dh - da)))
|
||||
219
sim/case/thesis_simplified/simplified.py
Normal file
219
sim/case/thesis_simplified/simplified.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Minimal implementation of thesis pricing system.
|
||||
|
||||
Implements the core loop: prices -> sessions -> demand -> prices
|
||||
with behavioral separability and robust pricing objective.
|
||||
|
||||
Objects:
|
||||
- Session trajectories tau_s from mixture of H/A behavioral profiles
|
||||
- Demand proxy q_hat via weighted action aggregation
|
||||
- COI leakage penalty for agent reconnaissance
|
||||
- Limbo: alternating price/demand history for trajectory analysis
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Tuple
|
||||
import numpy as np
|
||||
|
||||
from .coi import COIWindow, compute_coi_window
|
||||
from .separability import TRANS_H, TRANS_A, kl_div, build_kernel, compute_divergence, estimate_alpha
|
||||
|
||||
ACTION_WEIGHTS = {"add_to_cart": 0.8, "checkout": 0.9, "purchase": 1.0, "view": 0.15, "detail": 0.25, "hover": 0.3, "start": 0.05, "end": 0.0}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Event:
|
||||
action: str
|
||||
product_idx: int
|
||||
price_seen: float
|
||||
ts: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class Session:
|
||||
sid: str
|
||||
events: List[Event]
|
||||
actor: str # H or A (ground truth label)
|
||||
theta: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
|
||||
def compute_demand(session: Session) -> float:
|
||||
"""Compute demand proxy q_hat = sum_k omega(a_k) for session."""
|
||||
return sum(ACTION_WEIGHTS.get(e.action, 0.1) for e in session.events)
|
||||
|
||||
|
||||
def sample_trajectory(rng: np.random.Generator, trans: Dict, prices: np.ndarray, costs: np.ndarray, theta: Dict[str, float],
|
||||
is_agent: bool, session_noise: float = 0.02, surge: float = 0.08, max_mult: float = 1.8) -> Tuple[List[Event], int]:
|
||||
"""Sample session trajectory from behavioral kernel."""
|
||||
pidx = int(rng.integers(0, len(prices)))
|
||||
cost, base = float(costs[pidx]), float(prices[pidx]) * (1.0 + rng.normal(0.0, session_noise))
|
||||
base = float(np.clip(base, cost * 1.01, float(prices[pidx]) * 2.0))
|
||||
price, signal, state, t = base, 0.0, "start", 0.0
|
||||
events = []
|
||||
|
||||
while state != "end" and len(events) < 30:
|
||||
probs = trans.get(state, {"end": 1.0})
|
||||
nxt = rng.choice(list(probs.keys()), p=list(probs.values()))
|
||||
if nxt == "purchase": # purchase conversion check
|
||||
rel = max((price - cost) / (cost + 1e-6), 0.0)
|
||||
p_buy = float(np.clip(theta.get("base_conv", 0.2) * np.exp(-theta.get("price_sens", 2.0) * rel), 0.0, 1.0))
|
||||
if rng.random() > p_buy:
|
||||
nxt = "end"
|
||||
state = nxt
|
||||
if state not in {"start", "end"}:
|
||||
events.append(Event(action=state, product_idx=pidx, price_seen=float(price), ts=t))
|
||||
signal += float(ACTION_WEIGHTS.get(state, 0.1))
|
||||
price = float(np.clip(base * (1.0 + surge * signal), cost * 1.01, base * max_mult))
|
||||
t += max(0.2, rng.gamma(1.5, 0.8) if is_agent else rng.gamma(2.0, 1.2))
|
||||
return events, pidx
|
||||
|
||||
|
||||
def put_prices_to_market(prices: np.ndarray, costs: np.ndarray, alpha: float = 0.2, n_sessions: int = 50,
|
||||
seed: int | None = None) -> Tuple[List[Session], Dict[str, float]]:
|
||||
"""Generate sessions from mixture model. Returns sessions and demand mapping sid -> q_hat."""
|
||||
rng = np.random.default_rng(seed)
|
||||
sessions, demand = [], {}
|
||||
for i in range(n_sessions):
|
||||
sid = f"s{i:04d}"
|
||||
is_agent = rng.random() < alpha
|
||||
trans = TRANS_A if is_agent else TRANS_H
|
||||
theta = {"price_sens": rng.uniform(0.05, 0.2), "base_conv": 0.01} if is_agent else \
|
||||
{"price_sens": rng.uniform(1.5, 4.0), "base_conv": rng.uniform(0.2, 0.5)}
|
||||
events, _ = sample_trajectory(rng, trans, prices, costs=costs, theta=theta, is_agent=is_agent)
|
||||
session = Session(sid=sid, events=events, actor="A" if is_agent else "H", theta=theta)
|
||||
sessions.append(session)
|
||||
demand[sid] = compute_demand(session)
|
||||
return sessions, demand
|
||||
|
||||
|
||||
@dataclass
|
||||
class LimboUpdate:
|
||||
utype: str # "prices" or "demand"
|
||||
data: np.ndarray | Dict[str, float]
|
||||
t: int
|
||||
|
||||
|
||||
class Limbo:
|
||||
"""Historical trajectory of alternating price/demand observations."""
|
||||
|
||||
def __init__(self):
|
||||
self.history: List[LimboUpdate] = []
|
||||
self._t = 0
|
||||
|
||||
def add_update(self, utype: str, data: np.ndarray | Dict[str, float]) -> Dict:
|
||||
self.history.append(LimboUpdate(utype=utype, data=data, t=self._t))
|
||||
self._t += 1
|
||||
return {"action": "observe_demand" if utype == "prices" else "set_prices"}
|
||||
|
||||
def get_prices_history(self) -> List[np.ndarray]:
|
||||
return [u.data for u in self.history if u.utype == "prices"]
|
||||
|
||||
def get_demand_history(self) -> List[Dict[str, float]]:
|
||||
return [u.data for u in self.history if u.utype == "demand"]
|
||||
|
||||
|
||||
class System:
|
||||
"""Main pricing system implementing robust Stackelberg objective.
|
||||
|
||||
Manages the alternating loop: set prices p_t -> observe demand Q_hat(p_t) ->
|
||||
estimate contamination alpha from behavioral signals -> compute next prices.
|
||||
"""
|
||||
|
||||
def __init__(self, n_products: int = 10, costs: np.ndarray | None = None, lambda_coi: float = 0.5, seed: int | None = 42):
|
||||
self.n = n_products
|
||||
self.rng = np.random.default_rng(seed)
|
||||
self.costs = costs if costs is not None else self.rng.uniform(10, 50, n_products)
|
||||
self.refs = self.costs * (1 + self.rng.uniform(0.2, 0.5, n_products))
|
||||
self.lambda_coi = lambda_coi
|
||||
self.limbo = Limbo()
|
||||
self._alpha_est = 0.2
|
||||
self._sessions: List[Session] = []
|
||||
self._last_sessions: List[Session] = []
|
||||
self._last_coi: COIWindow | None = None
|
||||
|
||||
@property
|
||||
def alpha(self) -> float:
|
||||
return self._alpha_est
|
||||
|
||||
def _estimate_alpha_from_sessions(self) -> float:
|
||||
if not self._sessions:
|
||||
return self._alpha_est
|
||||
return float(np.mean([estimate_alpha(s) for s in self._sessions[-50:]]))
|
||||
|
||||
def _revenue_under_demand(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
|
||||
agg = np.zeros(self.n)
|
||||
for sid, q in demand.items():
|
||||
sess = next((s for s in self._sessions if s.sid == sid), None)
|
||||
if sess and sess.events:
|
||||
agg[sess.events[0].product_idx] += q
|
||||
return float(np.dot(prices, agg))
|
||||
|
||||
def _compute_coi_window(self, demand: Dict[str, float]) -> COIWindow:
|
||||
if not self._last_sessions:
|
||||
zeros = np.zeros(self.n, dtype=float)
|
||||
return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=0.0,
|
||||
policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)
|
||||
return compute_coi_window(self._last_sessions, self.costs, demand_mapping=demand)
|
||||
|
||||
def _objective(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
|
||||
"""Robust objective: R(p,d) - lambda * COI_leak."""
|
||||
profit = self._revenue_under_demand(prices, demand) - float(np.sum(self.costs))
|
||||
self._last_coi = self._compute_coi_window(demand)
|
||||
return profit - self.lambda_coi * self._last_coi.leak
|
||||
|
||||
def compute_prices(self, demand: Dict[str, float] | None = None) -> np.ndarray:
|
||||
"""Compute next prices via heuristic margin adjustment based on alpha estimate."""
|
||||
self._alpha_est = self._estimate_alpha_from_sessions()
|
||||
margin_scale = 1.0 - 0.5 * self._alpha_est # defensive pricing under high contamination
|
||||
margins = (self.refs - self.costs) * margin_scale
|
||||
noise = self.rng.normal(0, 0.02, self.n) * self.costs
|
||||
prices = np.clip(self.costs + margins + noise, self.costs * 1.02, self.refs * 1.3)
|
||||
self.limbo.add_update("prices", prices)
|
||||
return prices
|
||||
|
||||
def observe_demand(self, prices: np.ndarray, alpha_true: float = 0.2, n_sessions: int = 50) -> Dict[str, float]:
|
||||
sessions, demand_map = put_prices_to_market(prices, costs=self.costs, alpha=alpha_true,
|
||||
n_sessions=n_sessions, seed=int(self.rng.integers(0, 10000)))
|
||||
self._last_sessions = sessions
|
||||
self._sessions.extend(sessions)
|
||||
self.limbo.add_update("demand", demand_map)
|
||||
return demand_map
|
||||
|
||||
def step(self, alpha_true: float = 0.2, n_sessions: int = 50) -> Tuple[np.ndarray, Dict[str, float], float, COIWindow]:
|
||||
demand_hist = self.limbo.get_demand_history()
|
||||
prices = self.compute_prices(demand_hist[-1] if demand_hist else None)
|
||||
demand = self.observe_demand(prices, alpha_true, n_sessions)
|
||||
reward = self._objective(prices, demand)
|
||||
return prices, demand, reward, self._last_coi or self._compute_coi_window(demand)
|
||||
|
||||
def run(self, n_steps: int = 100, alpha_true: float = 0.2) -> Dict:
|
||||
traj = {"prices": [], "demand": [], "rewards": [], "alpha_est": [], "alpha_true": alpha_true,
|
||||
"coi_policy": [], "coi_agent": [], "coi_leak": [], "coi_survival": []}
|
||||
for _ in range(n_steps):
|
||||
p, d, r, coi = self.step(alpha_true)
|
||||
traj["prices"].append(p); traj["demand"].append(d); traj["rewards"].append(r)
|
||||
traj["alpha_est"].append(self._alpha_est)
|
||||
traj["coi_policy"].append(coi.policy); traj["coi_agent"].append(coi.agent)
|
||||
traj["coi_leak"].append(coi.leak); traj["coi_survival"].append(coi.survival_ratio)
|
||||
return traj
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys = System(n_products=5, seed=42)
|
||||
traj = sys.run(n_steps=20, alpha_true=0.25)
|
||||
print(f"avg reward: {np.mean(traj['rewards']):.2f}, final alpha_hat: {traj['alpha_est'][-1]:.3f}, "
|
||||
f"COI_policy: {np.mean(traj['coi_policy']):.3f}, COI_agent: {np.mean(traj['coi_agent']):.3f}, leak: {np.mean(traj['coi_leak']):.3f}")
|
||||
|
||||
prices = np.array([20.0, 35.0, 50.0, 25.0, 40.0])
|
||||
costs = np.array([15.0, 28.0, 40.0, 18.0, 30.0])
|
||||
sessions, demand = put_prices_to_market(prices, costs=costs, alpha=0.3, n_sessions=20, seed=123)
|
||||
print(f'sessions: {len(sessions)}, agents: {sum(1 for s in sessions if s.actor=="A")}')
|
||||
|
||||
for n in [1, 5, 10, 50, 100]:
|
||||
# theoretical: erosion = 1 - 2/(N+1) for uniform order statistic
|
||||
print(f'N={n:3d} agents -> COI erosion: {1.0 - 2.0/(n+1):.3f}')
|
||||
|
||||
events = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.5), Event('cart', 0, 20.0, 1.0), Event('purchase', 0, 20.0, 2.0)]
|
||||
print(f'human-like session alpha_hat: {estimate_alpha(Session(sid="test", events=events, actor="H")):.3f}')
|
||||
|
||||
events_a = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.2), Event('view', 0, 20.0, 0.3), Event('detail', 0, 20.0, 0.4)]
|
||||
print(f'agent-like session alpha_hat: {estimate_alpha(Session(sid="test2", events=events_a, actor="A")):.3f}')
|
||||
249
sim/case/thesis_simplified/simplified_env.py
Normal file
249
sim/case/thesis_simplified/simplified_env.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""Gymnasium-compatible RL environment for thesis pricing system.
|
||||
|
||||
Wraps simplified.System with standard Gym interface for training pricing policies.
|
||||
Supports multiple reward modes and contamination scenarios.
|
||||
|
||||
Action: price multipliers [0.5, 1.5] applied to reference prices
|
||||
Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
|
||||
Reward: configurable objective (revenue, profit, robust, coi-aware)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Tuple
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
HAS_GYM = True
|
||||
except ImportError:
|
||||
HAS_GYM = False
|
||||
|
||||
from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
|
||||
from .coi import COIWindow, compute_coi_window, coi_erosion
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnvConfig:
|
||||
n_products: int = 5
|
||||
max_steps: int = 200
|
||||
sessions_per_step: int = 30
|
||||
alpha_true: float = 0.2
|
||||
alpha_drift: float = 0.0
|
||||
alpha_bounds: Tuple[float, float] = (0.0, 0.6)
|
||||
lambda_coi: float = 0.5
|
||||
lambda_vol: float = 0.1
|
||||
reward_mode: str = "robust" # revenue | profit | robust | coi_aware
|
||||
normalize_reward: bool = True
|
||||
seed: int | None = 42
|
||||
|
||||
|
||||
def aggregate_purchases(sessions: list[Session], n_products: int, costs: np.ndarray) -> Tuple[np.ndarray, float, float]:
|
||||
"""Aggregate purchases from sessions, returns (counts, revenue, cost)."""
|
||||
purchases = np.zeros(n_products, dtype=float)
|
||||
revenue, cost = 0.0, 0.0
|
||||
for sess in sessions:
|
||||
for e in sess.events:
|
||||
if e.action == "purchase" and 0 <= e.product_idx < n_products:
|
||||
purchases[e.product_idx] += 1.0
|
||||
revenue += float(e.price_seen)
|
||||
cost += float(costs[e.product_idx])
|
||||
return purchases, revenue, cost
|
||||
|
||||
|
||||
class PricingEnv(gym.Env if HAS_GYM else object):
|
||||
"""RL environment for dynamic pricing under agent contamination.
|
||||
|
||||
Platform sets prices p_t, market responds with mixture demand Q(p) = (1-alpha)*D_H + alpha*D_A.
|
||||
Agent estimates contamination alpha_hat from behavioral signals.
|
||||
Reward balances profit vs COI leakage.
|
||||
"""
|
||||
metadata = {"render_modes": ["human", "ansi"]}
|
||||
|
||||
def __init__(self, cfg: EnvConfig | None = None):
|
||||
if not HAS_GYM:
|
||||
raise ImportError("gymnasium required")
|
||||
self.cfg = cfg or EnvConfig()
|
||||
self.n = self.cfg.n_products
|
||||
self._sys: System | None = None
|
||||
self._t = 0
|
||||
self._alpha = self.cfg.alpha_true
|
||||
self._last_prices: np.ndarray | None = None
|
||||
self._last_demand: Dict[str, float] | None = None
|
||||
self._episode_rewards: list[float] = []
|
||||
self._demand_agg = np.zeros(self.n)
|
||||
|
||||
self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
|
||||
obs_dim = self.n + self.n + 1 + 1 + self.n + 1 # prices + demand + alpha_hat + alpha + margins + t
|
||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
|
||||
|
||||
def _build_obs(self) -> np.ndarray:
|
||||
if self._sys is None:
|
||||
return np.zeros(self.observation_space.shape[0], dtype=np.float32)
|
||||
prices = self._last_prices if self._last_prices is not None else self._sys.refs
|
||||
return np.concatenate([
|
||||
prices / (self._sys.refs + 1e-6),
|
||||
self._demand_agg / (np.sum(self._demand_agg) + 1e-6),
|
||||
[self._sys.alpha, self._alpha],
|
||||
(prices - self._sys.costs) / (self._sys.costs + 1e-6),
|
||||
[self._t / self.cfg.max_steps],
|
||||
]).astype(np.float32)
|
||||
|
||||
def _compute_reward(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
|
||||
cfg, sys = self.cfg, self._sys
|
||||
if sys is None:
|
||||
return 0.0
|
||||
|
||||
# aggregate demand per product
|
||||
agg = np.zeros(self.n)
|
||||
for sid, q in demand.items():
|
||||
sess = next((s for s in sys._sessions if s.sid == sid), None)
|
||||
if sess and sess.events:
|
||||
agg[sess.events[0].product_idx] += q
|
||||
self._demand_agg = agg
|
||||
|
||||
_, revenue, cost = aggregate_purchases(sys._last_sessions, self.n, sys.costs)
|
||||
profit = revenue - cost
|
||||
|
||||
vol_penalty = 0.0
|
||||
if self._last_prices is not None:
|
||||
vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))
|
||||
|
||||
coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
|
||||
leak = float(coi.leak)
|
||||
|
||||
reward_fns = {
|
||||
"revenue": lambda: revenue,
|
||||
"profit": lambda: profit,
|
||||
"robust": lambda: profit - cfg.lambda_coi * leak - vol_penalty,
|
||||
"coi_aware": lambda: profit - cfg.lambda_coi * (1 + 2 * sys.alpha) * leak - vol_penalty,
|
||||
}
|
||||
r = reward_fns.get(cfg.reward_mode, lambda: profit)()
|
||||
return float(r / (float(np.sum(sys.refs)) + 1e-6)) if cfg.normalize_reward else float(r)
|
||||
|
||||
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
|
||||
seed = seed if seed is not None else self.cfg.seed
|
||||
self._sys = System(n_products=self.n, lambda_coi=self.cfg.lambda_coi, seed=seed)
|
||||
self._t, self._alpha = 0, self.cfg.alpha_true
|
||||
self._last_prices, self._last_demand = None, None
|
||||
self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
|
||||
return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
|
||||
"costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}
|
||||
|
||||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
|
||||
if self._sys is None:
|
||||
raise RuntimeError("call reset() first")
|
||||
|
||||
action = np.clip(action, 0.5, 1.5)
|
||||
prices = np.clip(self._sys.refs * action.astype(np.float64), self._sys.costs * 1.01, self._sys.refs * 2.0)
|
||||
demand = self._sys.observe_demand(prices, alpha_true=self._alpha, n_sessions=self.cfg.sessions_per_step)
|
||||
self._sys.limbo.add_update("prices", prices)
|
||||
self._sys._alpha_est = self._sys._estimate_alpha_from_sessions()
|
||||
|
||||
reward = self._compute_reward(prices, demand)
|
||||
self._episode_rewards.append(reward)
|
||||
self._last_prices, self._last_demand = prices.copy(), demand
|
||||
self._t += 1
|
||||
|
||||
# compute info metrics using shared helper
|
||||
purchases, revenue, cost = aggregate_purchases(self._sys._last_sessions, self.n, self._sys.costs)
|
||||
n_agents = int(self._alpha * self.cfg.sessions_per_step)
|
||||
coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)
|
||||
|
||||
info = {
|
||||
"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
|
||||
"alpha_error": abs(self._alpha - self._sys.alpha),
|
||||
"revenue": float(revenue), "profit": float(revenue - cost), "cost": float(cost),
|
||||
"n_purchases": int(np.sum(purchases)),
|
||||
"avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
|
||||
"n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
|
||||
"coi_erosion": coi_erosion(coi.policy, coi.agent),
|
||||
"coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
|
||||
"coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
|
||||
"cumulative_reward": sum(self._episode_rewards), "step": self._t,
|
||||
}
|
||||
return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info
|
||||
|
||||
def render(self, mode: str = "human") -> str | None:
|
||||
if self._sys is None or self._last_prices is None:
|
||||
return None
|
||||
out = f"t={self._t}/{self.cfg.max_steps} | alpha_true={self._alpha:.3f} alpha_hat={self._sys.alpha:.3f} | " \
|
||||
f"prices: {self._last_prices.round(1)} | demand: {self._demand_agg.round(2)} | " \
|
||||
f"reward: {self._episode_rewards[-1] if self._episode_rewards else 0:.3f}"
|
||||
if mode == "human":
|
||||
print(out)
|
||||
return out
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class ContaminationSweepEnv(PricingEnv):
|
||||
"""Environment that sweeps through contamination levels during training."""
|
||||
|
||||
def __init__(self, cfg: EnvConfig | None = None, alpha_schedule: list[float] | None = None):
|
||||
super().__init__(cfg)
|
||||
self._schedule = alpha_schedule or [0.1, 0.2, 0.3, 0.4, 0.5]
|
||||
self._schedule_idx = 0
|
||||
|
||||
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
|
||||
if options and options.get("advance_schedule", False):
|
||||
self._schedule_idx = (self._schedule_idx + 1) % len(self._schedule)
|
||||
self.cfg.alpha_true = self._schedule[self._schedule_idx]
|
||||
return super().reset(seed, options)
|
||||
|
||||
|
||||
class AdversarialEnv(PricingEnv):
|
||||
"""Environment with adversarial contamination dynamics.
|
||||
|
||||
Contamination increases when prices are predictable (agents exploit).
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: EnvConfig | None = None, exploitation_rate: float = 0.02):
|
||||
super().__init__(cfg)
|
||||
self._exploit_rate = exploitation_rate
|
||||
self._price_history: list[np.ndarray] = []
|
||||
|
||||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
|
||||
obs, reward, term, trunc, info = super().step(action)
|
||||
if self._last_prices is not None:
|
||||
self._price_history.append(self._last_prices.copy())
|
||||
predictability = 0.0
|
||||
if len(self._price_history) > 10:
|
||||
predictability = 1.0 / (float(np.std(self._price_history[-10:])) + 0.1)
|
||||
self._alpha = np.clip(self._alpha + self._exploit_rate * predictability * self._sys.rng.random(), *self.cfg.alpha_bounds)
|
||||
info["predictability"] = predictability
|
||||
return obs, reward, term, trunc, info
|
||||
|
||||
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
|
||||
self._price_history = []
|
||||
return super().reset(seed, options)
|
||||
|
||||
|
||||
def make_env(cfg: EnvConfig | None = None, env_type: str = "standard") -> PricingEnv:
|
||||
return {"sweep": ContaminationSweepEnv, "adversarial": AdversarialEnv}.get(env_type, PricingEnv)(cfg)
|
||||
|
||||
|
||||
# baseline policies
|
||||
fixed_price_policy = lambda refs, margin=0.0: np.ones(len(refs), dtype=np.float32) * (1.0 + margin)
|
||||
random_policy = lambda n, rng=None: (rng or np.random.default_rng()).uniform(0.7, 1.3, n).astype(np.float32)
|
||||
adaptive_policy = lambda obs, n, base=0.1: np.ones(n, dtype=np.float32) * (1.0 + base * (1.0 - 0.4 * obs[2 * n]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cfg = EnvConfig(n_products=100, max_steps=100, alpha_true=0.25, reward_mode="robust")
|
||||
env = make_env(cfg)
|
||||
obs, info = env.reset()
|
||||
print(f"initial: alpha={info['alpha_true']:.2f}")
|
||||
|
||||
total_reward = 0.0
|
||||
for t in range(cfg.max_steps):
|
||||
action = adaptive_policy(obs, cfg.n_products)
|
||||
obs, reward, done, _, info = env.step(action)
|
||||
total_reward += reward
|
||||
if t % 10 == 0:
|
||||
env.render()
|
||||
if done:
|
||||
break
|
||||
|
||||
print(f"\ntotal reward: {total_reward:.2f}, final alpha_hat: {info['alpha_est']:.3f}")
|
||||
168
sim/case/thesis_simplified/summarize.py
Normal file
168
sim/case/thesis_simplified/summarize.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Summarize TensorBoard logs into comparison tables."""
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
|
||||
HAS_TB = True
|
||||
except ImportError:
|
||||
HAS_TB = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunInfo:
|
||||
algo: str
|
||||
alpha: float
|
||||
reward_mode: str
|
||||
path: Path
|
||||
|
||||
|
||||
def parse_run_name(name: str) -> RunInfo | None:
|
||||
"""Extract algo, alpha, reward_mode from run directory name."""
|
||||
# patterns: ppo_a0.20_robust, cmp_fixed_a0.20, sac_a0.90_robust
|
||||
m = re.match(r'(cmp_)?(\w+)_a([\d.]+)_?(\w+)?', name)
|
||||
if not m:
|
||||
return None
|
||||
prefix, algo, alpha, mode = m.groups()
|
||||
return RunInfo(algo=algo, alpha=float(alpha), reward_mode=mode or 'robust', path=Path())
|
||||
|
||||
|
||||
def load_tb_scalars(log_dir: Path, tags: list[str], reduce: str = 'last') -> dict[str, float]:
|
||||
"""Load scalar values from TensorBoard event files."""
|
||||
if not HAS_TB:
|
||||
return {}
|
||||
ea = EventAccumulator(str(log_dir))
|
||||
ea.Reload()
|
||||
results = {}
|
||||
for tag in tags:
|
||||
if tag in ea.Tags().get('scalars', []):
|
||||
events = ea.Scalars(tag)
|
||||
if not events:
|
||||
continue
|
||||
vals = [e.value for e in events]
|
||||
if reduce == 'last':
|
||||
results[tag] = vals[-1]
|
||||
elif reduce == 'mean':
|
||||
results[tag] = sum(vals) / len(vals)
|
||||
elif reduce == 'max':
|
||||
results[tag] = max(vals)
|
||||
elif reduce == 'min':
|
||||
results[tag] = min(vals)
|
||||
return results
|
||||
|
||||
|
||||
def load_json_results(log_dir: Path) -> dict[str, float]:
|
||||
"""Load metrics from results.json if available."""
|
||||
results_file = log_dir / 'results.json'
|
||||
if results_file.exists():
|
||||
with open(results_file) as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def discover_runs(base_dir: Path) -> list[RunInfo]:
|
||||
"""Find all experiment runs in base directory."""
|
||||
runs = []
|
||||
for d in base_dir.iterdir():
|
||||
if not d.is_dir():
|
||||
continue
|
||||
info = parse_run_name(d.name)
|
||||
if info:
|
||||
info.path = d
|
||||
runs.append(info)
|
||||
return runs
|
||||
|
||||
|
||||
def build_tables(runs: list[RunInfo], metrics: list[str], reduce: str = 'last') -> dict[str, dict[str, pd.DataFrame]]:
|
||||
"""Build pivot tables: reward_mode -> metric -> DataFrame[alpha x algo]."""
|
||||
# collect data: {reward_mode: {metric: {(alpha, algo): value}}}
|
||||
data = defaultdict(lambda: defaultdict(dict))
|
||||
|
||||
tb_tags = [f'economics/{m}' if m in ['revenue', 'profit', 'margin'] else f'coi/{m}' if m in ['erosion', 'leakage'] else f'alpha/{m}' for m in metrics]
|
||||
tag_map = dict(zip(tb_tags, metrics))
|
||||
|
||||
for run in runs:
|
||||
# try json first (final eval metrics)
|
||||
jm = load_json_results(run.path)
|
||||
tb = load_tb_scalars(run.path, tb_tags, reduce)
|
||||
|
||||
for tag, metric in tag_map.items():
|
||||
val = None
|
||||
json_key = f'{metric}_mean' if metric != 'reward' else 'reward_mean'
|
||||
if json_key in jm:
|
||||
val = jm[json_key]
|
||||
elif tag in tb:
|
||||
val = tb[tag]
|
||||
if val is not None:
|
||||
data[run.reward_mode][metric][(run.alpha, run.algo)] = val
|
||||
|
||||
# convert to DataFrames
|
||||
tables = {}
|
||||
for mode, metrics_data in data.items():
|
||||
tables[mode] = {}
|
||||
for metric, vals in metrics_data.items():
|
||||
if not vals:
|
||||
continue
|
||||
alphas = sorted(set(a for a, _ in vals.keys()))
|
||||
algos = sorted(set(al for _, al in vals.keys()))
|
||||
df = pd.DataFrame(index=alphas, columns=algos, dtype=float)
|
||||
for (a, al), v in vals.items():
|
||||
df.loc[a, al] = v
|
||||
df.index.name = 'alpha'
|
||||
tables[mode][metric] = df
|
||||
return tables
|
||||
|
||||
|
||||
def format_table(df: pd.DataFrame, fmt: str = '.3f') -> str:
|
||||
"""Format DataFrame as markdown table."""
|
||||
return df.to_markdown(floatfmt=fmt)
|
||||
|
||||
|
||||
def summarize(base_dir: str = 'sim/case/thesis_simplified/runs',
|
||||
metrics: list[str] | None = None,
|
||||
reduce: str = 'last',
|
||||
output: str | None = None) -> dict:
|
||||
"""Generate summary tables from experiment runs."""
|
||||
base = Path(base_dir)
|
||||
metrics = metrics or ['revenue', 'profit', 'margin', 'erosion', 'leakage']
|
||||
|
||||
runs = discover_runs(base)
|
||||
if not runs:
|
||||
print(f"No runs found in {base}")
|
||||
return {}
|
||||
|
||||
print(f"Found {len(runs)} runs")
|
||||
tables = build_tables(runs, metrics, reduce)
|
||||
|
||||
lines = []
|
||||
for mode, metric_tables in sorted(tables.items()):
|
||||
lines.append(f"\n# Reward Mode: {mode}\n")
|
||||
for metric, df in sorted(metric_tables.items()):
|
||||
lines.append(f"\n## {metric}\n")
|
||||
lines.append(format_table(df))
|
||||
lines.append("")
|
||||
|
||||
report = '\n'.join(lines)
|
||||
print(report)
|
||||
|
||||
if output:
|
||||
Path(output).write_text(report)
|
||||
print(f"\nSaved to {output}")
|
||||
|
||||
return tables
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('--dir', default='sim/case/thesis_simplified/runs')
|
||||
p.add_argument('--metrics', nargs='+', default=['revenue', 'profit', 'margin', 'erosion', 'leakage'])
|
||||
p.add_argument('--reduce', default='last', choices=['last', 'mean', 'max', 'min'])
|
||||
p.add_argument('--output', '-o', help='save markdown to file')
|
||||
args = p.parse_args()
|
||||
summarize(args.dir, args.metrics, args.reduce, args.output)
|
||||
336
sim/case/thesis_simplified/train.py
Normal file
336
sim/case/thesis_simplified/train.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""RL training for thesis pricing system with thesis-aligned metrics.
|
||||
|
||||
Trains pricing policies using stable-baselines3 with TensorBoard logging.
|
||||
Tracks COI erosion, alpha estimation error, and economic KPIs per thesis formulation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from dataclasses import dataclass, asdict, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Callable, Any
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from stable_baselines3 import PPO, SAC, A2C
|
||||
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
from stable_baselines3.common.monitor import Monitor
|
||||
HAS_SB3 = True
|
||||
except ImportError:
|
||||
HAS_SB3 = False
|
||||
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
HAS_TB = True
|
||||
except ImportError:
|
||||
HAS_TB = False
|
||||
|
||||
from .simplified_env import PricingEnv, EnvConfig, make_env, adaptive_policy, fixed_price_policy, random_policy
|
||||
|
||||
|
||||
@dataclass
|
||||
class EpisodeMetrics:
|
||||
reward: float = 0.0
|
||||
revenue: float = 0.0
|
||||
profit: float = 0.0
|
||||
coi_erosion: float = 0.0
|
||||
coi_leakage: float = 0.0
|
||||
alpha_error: float = 0.0
|
||||
avg_margin: float = 0.0
|
||||
n_agents: int = 0
|
||||
steps: int = 0
|
||||
|
||||
def accumulate(self, info: Dict[str, Any]) -> None:
|
||||
self.steps += 1
|
||||
self.reward += info.get('reward', 0)
|
||||
self.revenue += info.get('revenue', 0)
|
||||
self.profit += info.get('profit', 0)
|
||||
self.coi_erosion += info.get('coi_erosion', 0)
|
||||
self.coi_leakage += info.get('coi_leakage', 0)
|
||||
self.alpha_error += abs(info.get('alpha_true', 0) - info.get('alpha_est', 0))
|
||||
self.avg_margin += info.get('avg_margin', 0)
|
||||
self.n_agents += info.get('n_agents', 0)
|
||||
|
||||
def normalized(self) -> Dict[str, float]:
|
||||
s = max(self.steps, 1)
|
||||
return {k: getattr(self, k) / s for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin', 'n_agents']}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExperimentConfig:
|
||||
algo: str = "ppo"
|
||||
total_timesteps: int = 100_000
|
||||
n_envs: int = 4
|
||||
eval_freq: int = 5000
|
||||
n_eval_episodes: int = 10
|
||||
log_dir: str = "sim/case/thesis_simplified/runs"
|
||||
seed: int = 42
|
||||
n_products: int = 10
|
||||
max_steps: int = 200
|
||||
alpha_true: float = 0.2
|
||||
reward_mode: str = "robust"
|
||||
experiment_name: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.experiment_name is None:
|
||||
self.experiment_name = f"{self.algo}_a{self.alpha_true:.2f}_{self.reward_mode}"
|
||||
|
||||
|
||||
class Policy:
|
||||
"""Unified policy interface for baselines and trained models."""
|
||||
|
||||
def __init__(self, policy_fn: Callable[[np.ndarray, int], np.ndarray], name: str):
|
||||
self._fn, self.name = policy_fn, name
|
||||
|
||||
def predict(self, obs: np.ndarray, deterministic: bool = True) -> tuple[np.ndarray, None]:
|
||||
return self._fn(obs, (len(obs) - 3) // 3), None
|
||||
|
||||
@staticmethod
|
||||
def fixed(margin: float = 0.15) -> "Policy":
|
||||
return Policy(lambda obs, n: fixed_price_policy(np.ones(n), margin), f"fixed_{margin:.2f}")
|
||||
|
||||
@staticmethod
|
||||
def adaptive(base_margin: float = 0.15) -> "Policy":
|
||||
return Policy(lambda obs, n: adaptive_policy(obs, n, base_margin), f"adaptive_{base_margin:.2f}")
|
||||
|
||||
@staticmethod
|
||||
def random() -> "Policy":
|
||||
return Policy(lambda obs, n: random_policy(n), "random")
|
||||
|
||||
@staticmethod
|
||||
def myopic(greed: float = 0.3) -> "Policy":
|
||||
def _fn(obs: np.ndarray, n: int) -> np.ndarray:
|
||||
demand_norm = obs[n:2*n] if len(obs) > 2*n else np.ones(n) * 0.5
|
||||
return np.ones(n, dtype=np.float32) * np.clip(1.0 + greed * (1 + np.mean(demand_norm)), 0.5, 1.5)
|
||||
return Policy(_fn, f"myopic_{greed:.1f}")
|
||||
|
||||
|
||||
def log_metrics(writer: SummaryWriter | None, metrics: Dict[str, float], prefix: str, step: int) -> None:
|
||||
if writer is None:
|
||||
return
|
||||
for k, v in metrics.items():
|
||||
writer.add_scalar(f'{prefix}/{k}', v, step)
|
||||
|
||||
|
||||
class MetricsCallback(BaseCallback):
|
||||
def __init__(self, writer: SummaryWriter | None, verbose: int = 0):
|
||||
super().__init__(verbose)
|
||||
self._writer = writer
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
if self._writer is None:
|
||||
return True
|
||||
for info in self.locals.get('infos', []):
|
||||
t = self.num_timesteps
|
||||
self._writer.add_scalar('economics/revenue', info.get('revenue', 0), t)
|
||||
self._writer.add_scalar('economics/profit', info.get('profit', 0), t)
|
||||
self._writer.add_scalar('economics/margin', info.get('avg_margin', 0), t)
|
||||
self._writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), t)
|
||||
self._writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), t)
|
||||
self._writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), t)
|
||||
self._writer.add_scalar('agents/count', info.get('n_agents', 0), t)
|
||||
return True
|
||||
|
||||
|
||||
def make_vec_env(cfg: ExperimentConfig, n_envs: int = 1) -> DummyVecEnv:
|
||||
def _make():
|
||||
return Monitor(make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
|
||||
alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed)))
|
||||
return DummyVecEnv([_make for _ in range(n_envs)])
|
||||
|
||||
|
||||
def run_episodes(policy: Policy | Any, env: PricingEnv, n_episodes: int) -> List[EpisodeMetrics]:
|
||||
"""Run policy for n episodes and collect metrics."""
|
||||
metrics = []
|
||||
for _ in range(n_episodes):
|
||||
obs, _ = env.reset()
|
||||
ep, done = EpisodeMetrics(), False
|
||||
while not done:
|
||||
action, _ = policy.predict(obs, deterministic=True)
|
||||
obs, reward, term, trunc, info = env.step(action)
|
||||
done = term or trunc
|
||||
ep.accumulate(info)
|
||||
ep.reward += reward
|
||||
metrics.append(ep)
|
||||
return metrics
|
||||
|
||||
|
||||
def evaluate_policy(policy: Policy | Any, cfg: ExperimentConfig, n_episodes: int = 20) -> Dict[str, float]:
|
||||
env = make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
|
||||
alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed + 999))
|
||||
metrics = run_episodes(policy, env, n_episodes)
|
||||
return {
|
||||
'reward_mean': np.mean([m.reward for m in metrics]), 'reward_std': np.std([m.reward for m in metrics]),
|
||||
**{f'{k}_mean': np.mean([m.normalized()[k] for m in metrics])
|
||||
for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin']},
|
||||
}
|
||||
|
||||
|
||||
def run_baseline(policy: Policy, vec_env: DummyVecEnv, total_steps: int, writer: SummaryWriter | None):
|
||||
obs, n_envs = vec_env.reset(), vec_env.num_envs
|
||||
ep_rewards = np.zeros(n_envs)
|
||||
|
||||
for step in range(0, total_steps, n_envs):
|
||||
actions = np.array([policy.predict(obs[i])[0] for i in range(n_envs)])
|
||||
obs, rewards, dones, infos = vec_env.step(actions)
|
||||
ep_rewards += rewards
|
||||
for i, info in enumerate(infos):
|
||||
if writer:
|
||||
writer.add_scalar('economics/revenue', info.get('revenue', 0), step)
|
||||
writer.add_scalar('economics/profit', info.get('profit', 0), step)
|
||||
writer.add_scalar('economics/margin', info.get('avg_margin', 0), step)
|
||||
writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), step)
|
||||
writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), step)
|
||||
writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), step)
|
||||
writer.add_scalar('agents/count', info.get('n_agents', 0), step)
|
||||
if dones[i]:
|
||||
if writer:
|
||||
writer.add_scalar('rollout/ep_reward', ep_rewards[i], step)
|
||||
ep_rewards[i] = 0
|
||||
|
||||
|
||||
def train(cfg: ExperimentConfig) -> Dict[str, Any]:
|
||||
is_baseline = cfg.algo.lower() in ["fixed", "adaptive", "random", "myopic"]
|
||||
if not HAS_SB3 and not is_baseline:
|
||||
raise ImportError("stable-baselines3 required: pip install stable-baselines3[extra]")
|
||||
|
||||
log_path = Path(cfg.log_dir) / cfg.experiment_name
|
||||
log_path.mkdir(parents=True, exist_ok=True)
|
||||
with open(log_path / "config.json", "w") as f:
|
||||
json.dump(asdict(cfg), f, indent=2)
|
||||
|
||||
writer = SummaryWriter(log_path) if HAS_TB else None
|
||||
train_env, eval_env = make_vec_env(cfg, cfg.n_envs), make_vec_env(cfg, 1)
|
||||
|
||||
if is_baseline:
|
||||
policy = {"fixed": Policy.fixed, "adaptive": Policy.adaptive, "random": Policy.random, "myopic": Policy.myopic}[cfg.algo.lower()]()
|
||||
run_baseline(policy, train_env, cfg.total_timesteps, writer)
|
||||
final_metrics = evaluate_policy(policy, cfg)
|
||||
else:
|
||||
algo_cls = {"ppo": PPO, "sac": SAC, "a2c": A2C}[cfg.algo.lower()]
|
||||
common = dict(verbose=1, seed=cfg.seed, tensorboard_log=str(log_path), device="auto")
|
||||
model = {
|
||||
"ppo": lambda: PPO("MlpPolicy", train_env, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01, **common),
|
||||
"sac": lambda: SAC("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50_000, batch_size=512, tau=0.02, gamma=0.99, learning_starts=1000, ent_coef="auto_0.1", train_freq=4, **common),
|
||||
"a2c": lambda: A2C("MlpPolicy", train_env, learning_rate=7e-4, n_steps=5, gamma=0.99, **common),
|
||||
}[cfg.algo.lower()]()
|
||||
|
||||
cb = MetricsCallback(writer)
|
||||
eval_cb = EvalCallback(eval_env, best_model_save_path=str(log_path / "best"), log_path=str(log_path),
|
||||
eval_freq=cfg.eval_freq, n_eval_episodes=cfg.n_eval_episodes, deterministic=True)
|
||||
model.learn(cfg.total_timesteps, callback=[cb, eval_cb], progress_bar=True)
|
||||
model.save(log_path / "final_model")
|
||||
policy = model
|
||||
final_metrics = evaluate_policy(model, cfg)
|
||||
|
||||
if writer:
|
||||
log_metrics(writer, final_metrics, 'final', cfg.total_timesteps)
|
||||
writer.close()
|
||||
|
||||
train_env.close(); eval_env.close()
|
||||
with open(log_path / "results.json", "w") as f:
|
||||
json.dump(final_metrics, f, indent=2)
|
||||
return {"path": str(log_path), "metrics": final_metrics}
|
||||
|
||||
|
||||
def _train_alpha(args: tuple) -> tuple[str, Dict]:
|
||||
"""Worker for parallel sweep - must be top-level for pickling."""
|
||||
cfg_dict, alpha = args
|
||||
cfg_dict["alpha_true"] = alpha
|
||||
cfg_dict["experiment_name"] = f"{cfg_dict['algo']}_a{alpha:.2f}_{cfg_dict['reward_mode']}"
|
||||
sweep_cfg = ExperimentConfig(**cfg_dict)
|
||||
print(f"[alpha={alpha:.2f}] starting")
|
||||
metrics = train(sweep_cfg)["metrics"]
|
||||
print(f"[alpha={alpha:.2f}] done")
|
||||
return f"alpha_{alpha:.2f}", metrics
|
||||
|
||||
|
||||
def run_sweep(cfg: ExperimentConfig, alphas: List[float] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
|
||||
alphas = alphas or [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
||||
cfg_dict = asdict(cfg)
|
||||
|
||||
if max_workers == 1: # sequential fallback
|
||||
results = dict(_train_alpha((cfg_dict.copy(), a)) for a in alphas)
|
||||
else:
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(_train_alpha, (cfg_dict.copy(), a)): a for a in alphas}
|
||||
results = {}
|
||||
for fut in as_completed(futures):
|
||||
key, metrics = fut.result()
|
||||
results[key] = metrics
|
||||
|
||||
summary_path = Path(cfg.log_dir) / f"sweep_{cfg.algo}_{cfg.reward_mode}.json"
|
||||
with open(summary_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"\nSweep results saved to {summary_path}")
|
||||
return results
|
||||
|
||||
|
||||
def _train_policy(args: tuple) -> tuple[str, Dict]:
|
||||
"""Worker for parallel policy comparison."""
|
||||
cfg_dict, algo = args
|
||||
cfg_dict["algo"] = algo
|
||||
cfg_dict["experiment_name"] = f"cmp_{algo}_a{cfg_dict['alpha_true']:.2f}"
|
||||
cmp_cfg = ExperimentConfig(**cfg_dict)
|
||||
print(f"[{algo}] starting")
|
||||
metrics = train(cmp_cfg)["metrics"]
|
||||
print(f"[{algo}] done")
|
||||
return algo, metrics
|
||||
|
||||
|
||||
def compare_policies(cfg: ExperimentConfig, policies: List[str] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
|
||||
policies = policies or ["fixed", "adaptive", "myopic", "random"]
|
||||
cfg_dict = asdict(cfg)
|
||||
|
||||
if max_workers == 1:
|
||||
results = dict(_train_policy((cfg_dict.copy(), p)) for p in policies)
|
||||
else:
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(_train_policy, (cfg_dict.copy(), p)): p for p in policies}
|
||||
results = {}
|
||||
for fut in as_completed(futures):
|
||||
algo, metrics = fut.result()
|
||||
results[algo] = metrics
|
||||
|
||||
cmp_path = Path(cfg.log_dir) / f"compare_a{cfg.alpha_true:.2f}.json"
|
||||
with open(cmp_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"\nComparison saved to {cmp_path}")
|
||||
for algo, m in results.items():
|
||||
print(f" {algo:12s}: reward={m['reward_mean']:.2f} coi_erosion={m['coi_erosion_mean']:.4f} alpha_err={m['alpha_error_mean']:.4f}")
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Train RL pricing policies")
|
||||
parser.add_argument("--algo", default="ppo", choices=["ppo", "sac", "a2c", "fixed", "adaptive", "random", "myopic"])
|
||||
parser.add_argument("--steps", type=int, default=100_000)
|
||||
parser.add_argument("--alpha", type=float, default=0.2)
|
||||
parser.add_argument("--reward-mode", default="robust", choices=["revenue", "profit", "robust", "coi_aware"])
|
||||
parser.add_argument("--n-products", type=int, default=10)
|
||||
parser.add_argument("--n-envs", type=int, default=4)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--log-dir", default="sim/case/thesis_simplified/runs")
|
||||
parser.add_argument("--sweep", action="store_true", help="run contamination sweep")
|
||||
parser.add_argument("--compare", action="store_true", help="compare all baselines")
|
||||
parser.add_argument("--workers", type=int, default=None, help="max parallel workers for sweep (None=auto, 1=sequential)")
|
||||
args = parser.parse_args()
|
||||
|
||||
cfg = ExperimentConfig(algo=args.algo, total_timesteps=args.steps, alpha_true=args.alpha,
|
||||
reward_mode=args.reward_mode, n_products=args.n_products,
|
||||
n_envs=args.n_envs, seed=args.seed, log_dir=args.log_dir)
|
||||
|
||||
if args.sweep:
|
||||
run_sweep(cfg, max_workers=args.workers)
|
||||
elif args.compare:
|
||||
compare_policies(cfg, max_workers=args.workers)
|
||||
else:
|
||||
result = train(cfg)
|
||||
print(f"\nTraining complete: {result['path']}")
|
||||
print(f"Metrics: {json.dumps(result['metrics'], indent=2)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
97
sim/rl/behavior_loader/loader.py
Normal file
97
sim/rl/behavior_loader/loader.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import os
|
||||
import json
|
||||
from pydantic import BaseModel as Base
|
||||
|
||||
class PayloadModel(Base):
|
||||
sessionId: str
|
||||
experimentId: str | None
|
||||
eventName: str
|
||||
page: str | None
|
||||
productId: str | None
|
||||
metadata: dict
|
||||
storeMode: str
|
||||
userAgent: str
|
||||
ts: str
|
||||
|
||||
class ValueModel(Base):
|
||||
payload: PayloadModel
|
||||
encoding: str
|
||||
isPayloadNull: bool
|
||||
schemaId: int
|
||||
size: int
|
||||
|
||||
class InteractionModel(Base):
|
||||
partitionID: int
|
||||
offset: int
|
||||
timestamp: int
|
||||
compression: str
|
||||
isTransactional: bool
|
||||
headers: list
|
||||
key: dict
|
||||
value: ValueModel
|
||||
|
||||
def _is_admin(page: str | None) -> bool:
|
||||
return page is not None and page.startswith("/admin/")
|
||||
|
||||
class Loader:
|
||||
def __init__(self, src_dir: str):
|
||||
self.src_dir = src_dir
|
||||
self.entries = os.listdir(src_dir)
|
||||
if not self.entries: raise ValueError("empty directory")
|
||||
self.data = self._load_sessions()
|
||||
|
||||
def _load_sessions(self) -> dict:
|
||||
sessions = {}
|
||||
for entry in self.entries:
|
||||
with open(f"{self.src_dir}/{entry}/int.json") as f:
|
||||
raw = json.load(f)
|
||||
ints = [InteractionModel(**i) for i in raw]
|
||||
sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)]
|
||||
return sessions
|
||||
|
||||
def get_data(self) -> dict:
|
||||
return self.data
|
||||
|
||||
def get_entries(self) -> tuple[list[str], int]:
|
||||
return self.entries, len(self.entries)
|
||||
|
||||
class AgentLoader(Loader):
|
||||
def _load_sessions(self) -> dict:
|
||||
sessions = {}
|
||||
for entry in self.entries:
|
||||
with open(f"{self.src_dir}/{entry}/int.json") as f:
|
||||
raw = json.load(f)
|
||||
ints = [PayloadModel(**i) for i in raw]
|
||||
sessions[entry] = [i for i in ints if not _is_admin(i.page)]
|
||||
return sessions
|
||||
|
||||
class JointLoader:
|
||||
def __init__(self, human_dir: str, agent_dir: str):
|
||||
self.human_loader = Loader(human_dir)
|
||||
self.agent_loader = AgentLoader(agent_dir)
|
||||
self.data = self._merge()
|
||||
self.entries = list(self.data.keys())
|
||||
|
||||
def _merge(self) -> dict:
|
||||
return {
|
||||
**{f"human_{sid}": [e.value.payload for e in evts]
|
||||
for sid, evts in self.human_loader.get_data().items()},
|
||||
**{f"agent_{sid}": evts
|
||||
for sid, evts in self.agent_loader.get_data().items()}
|
||||
}
|
||||
|
||||
def get_data(self) -> dict:
|
||||
return self.data
|
||||
|
||||
def get_entries(self) -> tuple[list[str], int]:
|
||||
return self.entries, len(self.entries)
|
||||
|
||||
if __name__ == "__main__":
|
||||
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
|
||||
human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
|
||||
|
||||
for name, cls, path in [("agent", AgentLoader, agent_dir),
|
||||
("human", Loader, human_dir),
|
||||
("joint", lambda d: JointLoader(human_dir, d), agent_dir)]:
|
||||
ldr = cls(path) if name != "joint" else cls(agent_dir)
|
||||
print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")
|
||||
256
sim/rl/behavior_loader/models.py
Normal file
256
sim/rl/behavior_loader/models.py
Normal file
@@ -0,0 +1,256 @@
|
||||
try:
|
||||
from loader import Loader, AgentLoader, JointLoader
|
||||
except ImportError:
|
||||
from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Tuple, Set
|
||||
import numpy as np
|
||||
import graphviz
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# import lib utilities for optional use - models keep their own _state_repr for backwards compat
|
||||
# with the specific event structure (evt.value.payload)
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib'))
|
||||
try:
|
||||
from lib.state import make_state_repr as lib_make_state_repr
|
||||
from lib.features import transition_histogram as lib_transition_histogram
|
||||
except ImportError:
|
||||
lib_make_state_repr = None
|
||||
lib_transition_histogram = None
|
||||
|
||||
|
||||
class BehaviorModel:
|
||||
def __init__(self, src_dir: str, loader_cls=Loader):
|
||||
self.loader = loader_cls(src_dir)
|
||||
self.data = self.loader.get_data()
|
||||
self.entries, self.num_entries = self.loader.get_entries()
|
||||
self.mdp = None
|
||||
|
||||
def _state_repr(self, evt) -> str:
|
||||
p = evt.value.payload
|
||||
return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}"
|
||||
|
||||
def _sort_key(self, evt):
|
||||
return evt.timestamp
|
||||
|
||||
def _extract_sessions(self) -> List[List[str]]:
|
||||
trajs = []
|
||||
for evts in self.data.values():
|
||||
if len(evts) < 2: continue
|
||||
states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)]
|
||||
trajs.append(states)
|
||||
return trajs
|
||||
|
||||
def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]:
|
||||
trans, states = defaultdict(lambda: defaultdict(int)), set()
|
||||
for traj in trajs:
|
||||
for s, s_next in zip(traj, traj[1:]):
|
||||
trans[s][s_next] += 1
|
||||
states.update([s, s_next])
|
||||
return trans, states
|
||||
|
||||
def _calc_rewards(self, trajs: List[List[str]]) -> Dict:
|
||||
rwd = defaultdict(list)
|
||||
for traj in trajs:
|
||||
n = len(traj)
|
||||
for i, s in enumerate(traj):
|
||||
rwd[s].append(i / n)
|
||||
return rwd
|
||||
|
||||
def _normalize_trans(self, cnts: Dict) -> Dict:
|
||||
return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()}
|
||||
for s, nxt in cnts.items()}
|
||||
|
||||
def build_MDP(self) -> Dict:
|
||||
trajs = self._extract_sessions()
|
||||
trans_cnt, states = self._calc_transitions(trajs)
|
||||
trans_prob = self._normalize_trans(trans_cnt)
|
||||
state_rwd = self._calc_rewards(trajs)
|
||||
|
||||
self.mdp = {
|
||||
'states': sorted(states),
|
||||
'num_states': len(states),
|
||||
'transitions': trans_prob,
|
||||
'state_values': {s: np.mean(r) for s, r in state_rwd.items()},
|
||||
'state_rewards': state_rwd,
|
||||
'trans_counts': trans_cnt,
|
||||
}
|
||||
return self.mdp
|
||||
|
||||
def transition_prob(self, s: str, s_next: str) -> float:
|
||||
if not self.mdp: raise ValueError("build MDP first")
|
||||
return self.mdp['transitions'].get(s, {}).get(s_next, 0.0)
|
||||
|
||||
def state_value(self, s: str) -> float:
|
||||
if not self.mdp: raise ValueError("build MDP first")
|
||||
return self.mdp['state_values'].get(s, 0.0)
|
||||
|
||||
def sample_traj(self, start: str, max_len: int = 50) -> List[str]:
|
||||
if not self.mdp: raise ValueError("build MDP first")
|
||||
path, curr = [start], start
|
||||
for _ in range(max_len):
|
||||
nxt = self.mdp['transitions'].get(curr, {})
|
||||
if not nxt: break
|
||||
curr = np.random.choice(list(nxt.keys()), p=list(nxt.values()))
|
||||
path.append(curr)
|
||||
return path
|
||||
|
||||
def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray:
|
||||
"""Convert trajectory to feature vector using MDP structure for contrastive learning"""
|
||||
if not self.mdp:
|
||||
self.build_MDP()
|
||||
|
||||
states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)]
|
||||
features = []
|
||||
|
||||
# transition histogram over MDP state space
|
||||
trans_counts = defaultdict(int)
|
||||
for s, s_next in zip(states, states[1:]):
|
||||
trans_counts[(s, s_next)] += 1
|
||||
all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()]
|
||||
trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]]
|
||||
trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec)) # pad
|
||||
total_trans = sum(trans_counts.values()) or 1
|
||||
features.extend([v / total_trans for v in trans_vec])
|
||||
|
||||
# state coverage ratio
|
||||
visited = set(states)
|
||||
features.append(len(visited) / max(self.mdp['num_states'], 1))
|
||||
|
||||
# temporal entropy of transitions
|
||||
if len(states) > 1:
|
||||
trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])]
|
||||
entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0)
|
||||
features.append(entropy / max(len(states), 1))
|
||||
else:
|
||||
features.append(0.0)
|
||||
|
||||
# trajectory length and unique state count
|
||||
features.append(len(states))
|
||||
features.append(len(visited))
|
||||
|
||||
# state value statistics along trajectory
|
||||
vals = [self.state_value(s) for s in states]
|
||||
if vals:
|
||||
features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)])
|
||||
else:
|
||||
features.extend([0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
|
||||
class AgentBehaviorModel(BehaviorModel):
|
||||
def __init__(self, src_dir: str):
|
||||
super().__init__(src_dir, AgentLoader)
|
||||
|
||||
def _state_repr(self, evt) -> str:
|
||||
return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
|
||||
|
||||
def _sort_key(self, evt):
|
||||
return evt.ts
|
||||
|
||||
class JointBehaviorModel(BehaviorModel):
|
||||
def __init__(self, human_dir: str, agent_dir: str):
|
||||
self.loader = JointLoader(human_dir, agent_dir)
|
||||
self.data = self.loader.get_data()
|
||||
self.entries, self.num_entries = self.loader.get_entries()
|
||||
self.mdp = None
|
||||
|
||||
def _state_repr(self, evt) -> str:
|
||||
return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
|
||||
|
||||
def _sort_key(self, evt):
|
||||
return evt.ts
|
||||
|
||||
def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
|
||||
evt_trans = defaultdict(lambda: defaultdict(float))
|
||||
for s, trans in mdp['transitions'].items():
|
||||
src = s.split('|')[2]
|
||||
for s_next, prob in trans.items():
|
||||
dst = s_next.split('|')[2]
|
||||
evt_trans[src][dst] += prob
|
||||
|
||||
for src in evt_trans:
|
||||
total = sum(evt_trans[src].values())
|
||||
if total > 0:
|
||||
evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()}
|
||||
return dict(evt_trans)
|
||||
|
||||
def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph",
|
||||
fmt: str = "svg", view: bool = False, export_dot: bool = False):
|
||||
if not model.mdp: raise ValueError("build MDP first")
|
||||
|
||||
evt_trans = aggregate_event_transitions(model.mdp)
|
||||
g = graphviz.Digraph(format=fmt)
|
||||
g.attr(rankdir='LR', size='30')
|
||||
g.attr('node', shape='circle', width='1', height='1')
|
||||
|
||||
events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()}
|
||||
for evt in events:
|
||||
g.node(evt)
|
||||
|
||||
for src, dsts in evt_trans.items():
|
||||
for dst, prob in dsts.items():
|
||||
if prob > threshold:
|
||||
g.edge(src, dst, label=f'{prob:.2f}')
|
||||
|
||||
g.render(output, view=view, cleanup=True)
|
||||
print(f"Saved MDP graph to {output}.{fmt}")
|
||||
|
||||
if export_dot:
|
||||
with open(f"{output}.dot", 'w') as f:
|
||||
f.write(g.source)
|
||||
print(f"Exported DOT source to {output}.dot")
|
||||
|
||||
return g
|
||||
|
||||
def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float:
|
||||
eps = 1e-10
|
||||
# p + log(p / q) summed over all keys in P
|
||||
return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p)
|
||||
|
||||
if __name__ == "__main__":
|
||||
base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
|
||||
human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
|
||||
|
||||
human_model = BehaviorModel(human_dir)
|
||||
human_mdp = human_model.build_MDP()
|
||||
print(f"Built MDP: {human_mdp['num_states']} states, "
|
||||
f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions")
|
||||
if not human_mdp['states']:
|
||||
exit("No states found")
|
||||
visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True)
|
||||
|
||||
agent_model = AgentBehaviorModel(agent_dir)
|
||||
agent_mdp = agent_model.build_MDP()
|
||||
|
||||
print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
|
||||
f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
|
||||
if not agent_mdp['states']:
|
||||
exit("No states found")
|
||||
visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True)
|
||||
|
||||
human_evt = aggregate_event_transitions(human_mdp)
|
||||
agent_evt = aggregate_event_transitions(agent_mdp)
|
||||
|
||||
common = set(human_evt.keys()) & set(agent_evt.keys())
|
||||
|
||||
if not common:
|
||||
exit("No common event types for KL divergence analysis")
|
||||
|
||||
kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common],
|
||||
key=lambda x: x[1], reverse=True)
|
||||
|
||||
print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}")
|
||||
print("\nMost divergent event types:")
|
||||
for evt, kl in kl_divs:
|
||||
print(f" {evt}: {kl:.4f}")
|
||||
|
||||
print("\n=== Joint Model (Human + Agent Combined) ===")
|
||||
joint_model = JointBehaviorModel(human_dir, agent_dir)
|
||||
joint_mdp = joint_model.build_MDP()
|
||||
print(f"Built joint MDP: {joint_mdp['num_states']} states, "
|
||||
f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
|
||||
if joint_mdp['states']:
|
||||
visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True)
|
||||
240
sim/rl/engine.py
Normal file
240
sim/rl/engine.py
Normal file
@@ -0,0 +1,240 @@
|
||||
from os import kill
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
from sim.rl.environment import BusinessLogicConstraints
|
||||
|
||||
"""
|
||||
An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
|
||||
From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
|
||||
"""
|
||||
|
||||
class BasePricingEngine(ABC):
|
||||
"""base interface for all pricing engines"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
self.c = constraints
|
||||
self.rng = np.random.default_rng(seed)
|
||||
self.step_count = 0
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
"""compute new prices given current state and observation from environment
|
||||
|
||||
args:
|
||||
current_prices: current price vector [N]
|
||||
observation: dict containing 'price', 'demand', and possibly interaction data
|
||||
|
||||
returns:
|
||||
new_prices: updated price vector [N]
|
||||
"""
|
||||
pass
|
||||
|
||||
def update(self, observation: Dict[str, Any], reward: float, done: bool, info: Dict[str, Any]) -> None:
|
||||
"""Default no-op update. Engines can override as needed."""
|
||||
self.last_observation = observation
|
||||
self.last_reward = reward
|
||||
self.last_info = info
|
||||
|
||||
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""reset engine state for new episode"""
|
||||
self.step_count = 0
|
||||
|
||||
|
||||
class WildPricingEngine(BasePricingEngine):
|
||||
"""production-like pricing using online elasticity estimation via EWMA regression"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
super().__init__(constraints, seed)
|
||||
# per-product unit costs (unknown to customers; known to platform)
|
||||
self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catalogue_size).astype(np.float32)
|
||||
# online elasticity estimate (start moderately elastic)
|
||||
self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32)
|
||||
# EWMA state for log-log regression
|
||||
self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.cov_pq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.var_p = np.ones(self.c.product_catalogue_size, dtype=np.float32)
|
||||
# knobs typical in production
|
||||
self.lr = 0.08
|
||||
self.ewma = 0.05
|
||||
self.eps_explore = 0.03
|
||||
self.explore_scale = 0.03
|
||||
|
||||
def _safe_elasticity(self, e: np.ndarray) -> np.ndarray:
|
||||
return np.clip(e, -5.0, -1.05)
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
|
||||
self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
# extract demand signal (from env observation) as proxy for sales
|
||||
demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
|
||||
return self._update_from_demand(current_prices, demand)
|
||||
|
||||
def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
|
||||
# log transforms (add 1 to handle zeros)
|
||||
logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32)
|
||||
logq = np.log(sold + 1.0).astype(np.float32)
|
||||
# EWMA moments for per-product regression: logq ≈ a + e*logp
|
||||
a = self.ewma
|
||||
dp = logp - self.mu_logp
|
||||
dq = logq - self.mu_logq
|
||||
self.mu_logp = (1 - a) * self.mu_logp + a * logp
|
||||
self.mu_logq = (1 - a) * self.mu_logq + a * logq
|
||||
self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq)
|
||||
self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6)
|
||||
e_new = self.cov_pq / (self.var_p + 1e-6)
|
||||
self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new)
|
||||
# profit-optimal price for isoelastic demand (if e < -1)
|
||||
e = self.e_hat
|
||||
p_star = self.unit_cost * (e / (e + 1.0))
|
||||
# smooth toward p_star
|
||||
new_prices = (1 - self.lr) * prices + self.lr * p_star
|
||||
# exploration (small random perturbations)
|
||||
if self.rng.random() < self.eps_explore:
|
||||
noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32)
|
||||
new_prices = new_prices * (1.0 + noise)
|
||||
# apply business guardrails (max change + bounds)
|
||||
max_adj = self.c.max_price_adjustment
|
||||
ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj)
|
||||
new_prices = prices * ratio
|
||||
new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
|
||||
return new_prices
|
||||
|
||||
|
||||
class StaticPricingEngine(BasePricingEngine):
|
||||
"""baseline: fixed prices throughout episode"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
super().__init__(constraints, seed)
|
||||
self.fixed_prices = None
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.fixed_prices = None
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
if self.fixed_prices is None:
|
||||
self.fixed_prices = current_prices.copy()
|
||||
return self.fixed_prices.copy()
|
||||
|
||||
|
||||
class SimpleDemandEngine(BasePricingEngine):
|
||||
"""demand-driven pricing: increase price when demand rises, decrease when it falls"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
super().__init__(constraints, seed)
|
||||
self.prev_demand = None
|
||||
self.lr = 0.05
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.prev_demand = None
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
demand = _extract_demand(observation, self.c.product_catalogue_size)
|
||||
if self.prev_demand is None:
|
||||
self.prev_demand = demand.copy()
|
||||
return current_prices.copy()
|
||||
# simple rule: if demand increases, raise price; if decreases, lower price
|
||||
delta_d = demand - self.prev_demand
|
||||
price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0)
|
||||
new_prices = current_prices * (1.0 + price_adj)
|
||||
self.prev_demand = demand.copy()
|
||||
# apply constraints
|
||||
max_adj = self.c.max_price_adjustment
|
||||
ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
|
||||
new_prices = current_prices * ratio
|
||||
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
|
||||
|
||||
|
||||
class RandomWalkEngine(BasePricingEngine):
|
||||
"""random walk pricing with mean reversion"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
super().__init__(constraints, seed)
|
||||
self.target_price = None
|
||||
self.volatility = 0.02
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.target_price = None
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
if self.target_price is None:
|
||||
self.target_price = current_prices.copy()
|
||||
# random walk with mean reversion toward target
|
||||
noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32)
|
||||
reversion = 0.01 * (self.target_price - current_prices)
|
||||
new_prices = current_prices * (1.0 + noise) + reversion
|
||||
# apply constraints
|
||||
max_adj = self.c.max_price_adjustment
|
||||
ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
|
||||
new_prices = current_prices * ratio
|
||||
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
|
||||
|
||||
|
||||
class ThompsonSamplingEngine(BasePricingEngine):
|
||||
"""bayesian bandit approach per product treating price as discrete action"""
|
||||
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
|
||||
super().__init__(constraints, seed)
|
||||
self.n_price_levels = 5
|
||||
self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
|
||||
self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
|
||||
self.price_grid = None
|
||||
self.last_actions = None
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
|
||||
self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
|
||||
self.price_grid = None
|
||||
self.last_actions = None
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
if self.price_grid is None:
|
||||
# define price grid per product
|
||||
lo = current_prices * 0.7
|
||||
hi = current_prices * 1.3
|
||||
self.price_grid = np.linspace(lo, hi, self.n_price_levels).T
|
||||
demand = _extract_demand(observation, self.c.product_catalogue_size)
|
||||
# update beliefs based on last action
|
||||
if self.last_actions is not None:
|
||||
for i in range(self.c.product_catalogue_size):
|
||||
a = self.last_actions[i]
|
||||
reward = demand[i]
|
||||
if reward > 0.5:
|
||||
self.alpha[i, a] += reward
|
||||
else:
|
||||
self.beta[i, a] += 1.0
|
||||
# thompson sampling: sample from posterior, pick best
|
||||
new_prices = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
actions = np.zeros(self.c.product_catalogue_size, dtype=int)
|
||||
for i in range(self.c.product_catalogue_size):
|
||||
theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32)
|
||||
actions[i] = int(np.argmax(theta))
|
||||
new_prices[i] = self.price_grid[i, actions[i]]
|
||||
self.last_actions = actions
|
||||
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
|
||||
|
||||
|
||||
def _extract_demand(observation: Dict[str, Any], n: int) -> np.ndarray:
|
||||
if "elasticity" in observation and isinstance(observation["elasticity"], dict):
|
||||
d = observation["elasticity"].get("demand")
|
||||
if d is not None:
|
||||
return np.asarray(d, dtype=np.float32)
|
||||
d = observation.get("demand")
|
||||
if d is not None:
|
||||
return np.asarray(d, dtype=np.float32)
|
||||
return np.zeros(n, dtype=np.float32)
|
||||
@@ -1,451 +1,244 @@
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
import numpy as np
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import pandas as pd
|
||||
from typing import Callable, Optional, Dict, Any, List
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
# "learner" agent learning to optimize pricing
|
||||
# "agent" part of environment creating demand signals that learner processes
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
except ImportError as e:
|
||||
raise ImportError("sim.rl.environment requires gymnasium") from e
|
||||
|
||||
from sim.case.thesis_simplified.coi import COIWindow, coi_erosion, compute_coi_window
|
||||
from sim.case.thesis_simplified.separability import estimate_alpha as estimate_session_alpha
|
||||
from sim.case.thesis_simplified.simplified import Limbo, Session, put_prices_to_market
|
||||
from sim.rl.thesis_core import aggregate_demand_by_product, aggregate_purchases, constrain_prices
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BusinessLogicConstraints:
|
||||
product_catalogue_size: int = 100
|
||||
max_steps: int = 2000
|
||||
sessions_per_step: int = 250
|
||||
|
||||
@dataclass
|
||||
class BusinessLogicConstraints():
|
||||
max_price_adjustment: float = 0.30
|
||||
system_max_price: float = 500.0
|
||||
system_min_price: float = 1.0
|
||||
product_catelogue_size: int = 100
|
||||
episode_length: int = 200
|
||||
sessions_per_step: int = 250
|
||||
agent_share: float = 0.25
|
||||
agent_recon_multiplier: float = 6.0
|
||||
agent_purchase_probability: float = 0.20
|
||||
max_price_adjustment: float = 0.30
|
||||
min_margin_pct: float = 0.05
|
||||
|
||||
agent_share: float = 0.2
|
||||
alpha_drift: float = 0.0
|
||||
alpha_bounds: tuple[float, float] = (0.0, 0.8)
|
||||
|
||||
coi_strength: float = 0.25
|
||||
coi_threshold: float = 4.0
|
||||
coi_sigmoid_temp: float = 1.25
|
||||
base_human_demand: float = 0.08
|
||||
base_agent_demand: float = 0.05
|
||||
human_price_elasticity: float = -1.2
|
||||
agent_price_elasticity: float = -0.6
|
||||
w_agent_loss: float = 1.0
|
||||
w_volatility: float = 5.0
|
||||
w_estimation_error: float = 0.25
|
||||
|
||||
seed: int = 7
|
||||
|
||||
|
||||
def _sigmoid(x: np.ndarray) -> np.ndarray:
|
||||
return 1.0 / (1.0 + np.exp(-x))
|
||||
|
||||
|
||||
def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
|
||||
# baseline heuristic: high velocity + low conversion
|
||||
v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
|
||||
cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
|
||||
total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
|
||||
return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
|
||||
|
||||
|
||||
class CommercePlatform:
|
||||
def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
|
||||
constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
|
||||
use_defense: bool = False):
|
||||
self.product_catelogue_size = product_catelogue_size
|
||||
self.max_price = max_price
|
||||
self.min_price = min_price
|
||||
self.constraints = constraints
|
||||
self.use_defense = use_defense
|
||||
self.agent_detector = agent_detector
|
||||
self.simulation_history: List[Dict[str, Any]] = []
|
||||
self._rng = np.random.default_rng(constraints.seed)
|
||||
self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
|
||||
self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
|
||||
self._last_interaction_df: pd.DataFrame = pd.DataFrame()
|
||||
|
||||
def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
|
||||
# ground truth purchase propensities
|
||||
p = np.clip(prices, self.min_price, self.max_price)
|
||||
pn = p / self.max_price
|
||||
human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
|
||||
agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
|
||||
return {
|
||||
"human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
|
||||
"agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
|
||||
}
|
||||
|
||||
def _session_markup_multiplier(self, signal_score: float) -> float:
|
||||
# session-based COI markup based on demand signal expression
|
||||
x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
|
||||
return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
|
||||
|
||||
def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
|
||||
demand = self.setup_true_demand(base_prices)
|
||||
human_pprob = demand["human_purchase_prob"]
|
||||
agent_pprob = demand["agent_purchase_prob"]
|
||||
events: List[Dict[str, Any]] = []
|
||||
T = self.constraints.sessions_per_step
|
||||
n_agent_sessions = int(round(T * self.constraints.agent_share))
|
||||
n_human_sessions = T - n_agent_sessions
|
||||
|
||||
# human sessions: normal browse with possible purchase
|
||||
for s in range(n_human_sessions):
|
||||
session_id = f"h_{len(events)}_{s}"
|
||||
k = int(self._rng.integers(1, 4))
|
||||
prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
|
||||
t = 0.0
|
||||
inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
|
||||
signal_score = 0.0
|
||||
purchased_any = False
|
||||
|
||||
for i, pid in enumerate(prod_ids):
|
||||
t += float(inter_times[i])
|
||||
price_shown = float(base_prices[pid])
|
||||
events.append({
|
||||
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
|
||||
"action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
|
||||
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
|
||||
})
|
||||
signal_score += 1.0
|
||||
|
||||
if self._rng.random() < 0.35:
|
||||
t += float(inter_times[i + k])
|
||||
events.append({
|
||||
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
|
||||
"action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
|
||||
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
|
||||
})
|
||||
signal_score += 2.0
|
||||
|
||||
if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
|
||||
t += float(inter_times[i + 2 * k])
|
||||
mult = self._session_markup_multiplier(signal_score)
|
||||
price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
|
||||
events.append({
|
||||
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
|
||||
"action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
|
||||
"price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
|
||||
})
|
||||
purchased_any = True
|
||||
|
||||
# agent sessions: split recon/purchase to circumvent COI
|
||||
n_agent_ids = max(1, n_agent_sessions // 2)
|
||||
for a in range(n_agent_ids):
|
||||
agent_id = f"a_{a}"
|
||||
recon_session_id = f"{agent_id}_recon"
|
||||
t = 0.0
|
||||
n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
|
||||
inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
|
||||
prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
|
||||
recon_signal = 0.0
|
||||
|
||||
for i, pid in enumerate(prod_ids):
|
||||
t += float(inter_times[i])
|
||||
events.append({
|
||||
"session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
|
||||
"action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
|
||||
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
|
||||
})
|
||||
recon_signal += 1.0
|
||||
|
||||
# clean purchase session with minimal interactions
|
||||
if self._rng.random() < self.constraints.agent_purchase_probability:
|
||||
purchase_session_id = f"{agent_id}_clean"
|
||||
pid = int(self._rng.integers(0, self.product_catelogue_size))
|
||||
t2 = 0.0
|
||||
clean_signal = 0.0
|
||||
t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
|
||||
events.append({
|
||||
"session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
|
||||
"action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
|
||||
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
|
||||
})
|
||||
clean_signal += 1.0
|
||||
|
||||
if self._rng.random() < float(agent_pprob[pid]):
|
||||
t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
|
||||
obs_mult = self._session_markup_multiplier(clean_signal)
|
||||
obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
|
||||
oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase
|
||||
oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
|
||||
events.append({
|
||||
"session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
|
||||
"action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
|
||||
"price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
|
||||
})
|
||||
|
||||
return pd.DataFrame(events)
|
||||
|
||||
def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
|
||||
if interaction_df.empty:
|
||||
return {"mean_sale_price": 0.0, "look_to_book": 0.0}
|
||||
purchases = interaction_df[interaction_df["action"] == "purchase"]
|
||||
mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
|
||||
views = float((interaction_df["action"] == "view").sum())
|
||||
buys = float((interaction_df["action"] == "purchase").sum())
|
||||
return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
|
||||
|
||||
def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty:
|
||||
return pd.DataFrame()
|
||||
g = df.groupby("session_id", sort=False)
|
||||
session_duration = g["t"].max() - g["t"].min()
|
||||
total_interactions = g.size()
|
||||
avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
|
||||
interaction_velocity = total_interactions / (session_duration + 1e-6)
|
||||
views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
|
||||
cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
|
||||
purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
|
||||
conversion_rate = purchases / (views + 1e-6)
|
||||
is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
|
||||
|
||||
return pd.DataFrame({
|
||||
"session_duration_sec": session_duration.astype(float),
|
||||
"avg_time_between_events": avg_time_between.astype(float),
|
||||
"total_interactions": total_interactions.astype(int),
|
||||
"interaction_velocity": interaction_velocity.astype(float),
|
||||
"item_views": views.astype(int),
|
||||
"cart_adds": cart_adds.astype(int),
|
||||
"purchases": purchases.astype(int),
|
||||
"conversion_rate": conversion_rate.astype(float),
|
||||
"is_agent": is_agent.astype(bool),
|
||||
}).reset_index()
|
||||
|
||||
def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
|
||||
# proxy demand from weighted interaction events
|
||||
if interaction_df.empty:
|
||||
return np.zeros(self.product_catelogue_size, dtype=np.float32)
|
||||
df = interaction_df
|
||||
if exclude_sessions is not None:
|
||||
bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
|
||||
df = df[~df["session_id"].isin(bad_sessions)]
|
||||
weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
|
||||
w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
|
||||
prod = df["product_id"].to_numpy(dtype=int)
|
||||
q_hat = np.zeros(self.product_catelogue_size, dtype=float)
|
||||
np.add.at(q_hat, prod, w)
|
||||
return q_hat.astype(np.float32)
|
||||
|
||||
def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
|
||||
interaction_df = self._simulate_sessions(prices)
|
||||
self._last_interaction_df = interaction_df
|
||||
session_df = self._session_feature_table(interaction_df)
|
||||
|
||||
predicted_agent_sessions = None
|
||||
if (self.use_defense and self.agent_detector is not None and not session_df.empty):
|
||||
predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
|
||||
|
||||
q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
|
||||
q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
|
||||
if predicted_agent_sessions is not None else q_hat_naive.copy()
|
||||
|
||||
true_human = np.zeros(self.product_catelogue_size, dtype=float)
|
||||
true_agent = np.zeros(self.product_catelogue_size, dtype=float)
|
||||
if not interaction_df.empty:
|
||||
purchases = interaction_df[interaction_df["action"] == "purchase"]
|
||||
if not purchases.empty:
|
||||
for _, r in purchases.iterrows():
|
||||
if r["actor"] == "human":
|
||||
true_human[int(r["product_id"])] += 1.0
|
||||
else:
|
||||
true_agent[int(r["product_id"])] += 1.0
|
||||
|
||||
revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
|
||||
revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
|
||||
agent_loss = max(0.0, revenue_oracle - revenue_observed)
|
||||
|
||||
eps = 1e-6
|
||||
internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
|
||||
internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
|
||||
interaction_features = self.compute_interaction_features(interaction_df)
|
||||
|
||||
summary = {
|
||||
"prices": prices.copy(),
|
||||
"interaction_df": interaction_df,
|
||||
"session_df": session_df,
|
||||
"q_hat_naive": q_hat_naive,
|
||||
"q_hat_defended": q_hat_defended,
|
||||
"true_human_demand": true_human.astype(np.float32),
|
||||
"true_agent_purchases": true_agent.astype(np.float32),
|
||||
"internal_error_naive": internal_error_naive.astype(np.float32),
|
||||
"internal_error_defended": internal_error_def.astype(np.float32),
|
||||
"interaction_features": interaction_features,
|
||||
"revenue_observed": revenue_observed,
|
||||
"revenue_oracle": revenue_oracle,
|
||||
"agent_loss": agent_loss,
|
||||
"predicted_agent_sessions": predicted_agent_sessions,
|
||||
}
|
||||
self.simulation_history.append(summary)
|
||||
return summary
|
||||
|
||||
def get_interaction_data(self) -> np.ndarray:
|
||||
if self._last_interaction_df.empty:
|
||||
return np.array([], dtype=object)
|
||||
return self._last_interaction_df.to_dict(orient="records")
|
||||
def make_env(constraints: Optional[BusinessLogicConstraints] = None) -> "PHANTOMEnv":
|
||||
return PHANTOMEnv(constraints=constraints or BusinessLogicConstraints())
|
||||
|
||||
|
||||
class PHANTOMEnv(gym.Env):
|
||||
metadata = {"render_modes": []}
|
||||
metadata = {"render_modes": ["human", "ansi"]}
|
||||
|
||||
def __init__(self, use_defense: bool = False):
|
||||
def __init__(self, constraints: Optional[BusinessLogicConstraints] = None):
|
||||
super().__init__()
|
||||
self.constraints = BusinessLogicConstraints()
|
||||
self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
|
||||
high=self.constraints.max_price_adjustment,
|
||||
shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
|
||||
self.observation_space = spaces.Dict({
|
||||
"elasticity": spaces.Dict({
|
||||
"price": spaces.Box(
|
||||
low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
|
||||
high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
|
||||
dtype=np.float32),
|
||||
"demand": spaces.Box(
|
||||
low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
|
||||
high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
|
||||
dtype=np.float32),
|
||||
})
|
||||
})
|
||||
self.commerce_platform = CommercePlatform(
|
||||
product_catelogue_size=self.constraints.product_catelogue_size,
|
||||
max_price=self.constraints.system_max_price,
|
||||
min_price=self.constraints.system_min_price,
|
||||
constraints=self.constraints,
|
||||
agent_detector=simple_agent_detector,
|
||||
use_defense=use_defense)
|
||||
self._rng = np.random.default_rng(self.constraints.seed)
|
||||
self.t = 0
|
||||
self._prev_prices: Optional[np.ndarray] = None
|
||||
self.state: Dict[str, Any] = {}
|
||||
self.c = constraints or BusinessLogicConstraints()
|
||||
self.n = int(self.c.product_catalogue_size)
|
||||
|
||||
self._rng = np.random.default_rng(self.c.seed)
|
||||
self._t = 0
|
||||
self._alpha_true = float(self.c.agent_share)
|
||||
self._alpha_hat = float(self.c.agent_share)
|
||||
self._costs = np.zeros(self.n, dtype=np.float32)
|
||||
self._refs = np.zeros(self.n, dtype=np.float32)
|
||||
self._prices: Optional[np.ndarray] = None
|
||||
self._last_sessions: list[Session] = []
|
||||
self._last_coi: COIWindow | None = None
|
||||
self._limbo = Limbo()
|
||||
|
||||
self.action_space = spaces.Box(
|
||||
low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
|
||||
high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
|
||||
dtype=np.float32,
|
||||
)
|
||||
self.observation_space = spaces.Dict(
|
||||
{
|
||||
"elasticity": spaces.Dict(
|
||||
{
|
||||
"price": spaces.Box(
|
||||
low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
|
||||
high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
|
||||
dtype=np.float32,
|
||||
),
|
||||
"demand": spaces.Box(
|
||||
low=np.zeros((self.n,), dtype=np.float32),
|
||||
high=np.full((self.n,), 1e9, dtype=np.float32),
|
||||
dtype=np.float32,
|
||||
),
|
||||
}
|
||||
),
|
||||
"market": spaces.Dict(
|
||||
{
|
||||
"alpha_hat": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
|
||||
"revenue_rate": spaces.Box(low=0.0, high=1e12, shape=(1,), dtype=np.float32),
|
||||
"conversion_rate": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
|
||||
"price_volatility": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
|
||||
}
|
||||
),
|
||||
"cost": spaces.Box(
|
||||
low=np.zeros((self.n,), dtype=np.float32),
|
||||
high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
|
||||
dtype=np.float32,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
def _reset_catalogue(self) -> None:
|
||||
self._costs = self._rng.uniform(15.0, 60.0, size=self.n).astype(np.float32)
|
||||
margins = self._rng.uniform(0.2, 0.6, size=self.n).astype(np.float32)
|
||||
self._refs = (self._costs * (1.0 + margins)).astype(np.float32)
|
||||
self._prices = self._refs.copy()
|
||||
|
||||
def _observe_market(
|
||||
self, prices: np.ndarray
|
||||
) -> tuple[list[Session], Dict[str, float], np.ndarray, np.ndarray, float, float, int]:
|
||||
sessions, demand_map = put_prices_to_market(
|
||||
prices,
|
||||
costs=self._costs,
|
||||
alpha=self._alpha_true,
|
||||
n_sessions=int(self.c.sessions_per_step),
|
||||
seed=int(self._rng.integers(0, 2**31 - 1)),
|
||||
)
|
||||
demand_by_product = aggregate_demand_by_product(sessions, demand_map, self.n)
|
||||
purchases, revenue, cost, n_agents = aggregate_purchases(sessions, self._costs, self.n)
|
||||
conversion = float(np.sum(purchases) / max(len(sessions), 1))
|
||||
return sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents
|
||||
|
||||
def _update_alpha_hat(self, sessions: list[Session]) -> float:
|
||||
scores = [estimate_session_alpha(s) for s in sessions if s.events]
|
||||
if not scores:
|
||||
return self._alpha_hat
|
||||
alpha_step = float(np.mean(scores))
|
||||
self._alpha_hat = 0.8 * self._alpha_hat + 0.2 * alpha_step
|
||||
self._alpha_hat = float(np.clip(self._alpha_hat, 0.0, 1.0))
|
||||
return self._alpha_hat
|
||||
|
||||
def _reward(self, prices: np.ndarray, revenue: float, cost: float, volatility: float) -> float:
|
||||
profit = float(revenue - cost)
|
||||
coi_leak = float(self._last_coi.leak) if self._last_coi else 0.0
|
||||
alpha_err = abs(self._alpha_hat - self._alpha_true)
|
||||
return profit - self.c.coi_strength * coi_leak - self.c.w_volatility * volatility - self.c.w_estimation_error * alpha_err
|
||||
|
||||
def _build_obs(
|
||||
self,
|
||||
prices: np.ndarray,
|
||||
demand_by_product: np.ndarray,
|
||||
revenue: float,
|
||||
conversion: float,
|
||||
volatility: float,
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"elasticity": {"price": prices.astype(np.float32), "demand": demand_by_product.astype(np.float32)},
|
||||
"market": {
|
||||
"alpha_hat": np.array([self._alpha_hat], dtype=np.float32),
|
||||
"revenue_rate": np.array([revenue], dtype=np.float32),
|
||||
"conversion_rate": np.array([conversion], dtype=np.float32),
|
||||
"price_volatility": np.array([volatility], dtype=np.float32),
|
||||
},
|
||||
"cost": self._costs.astype(np.float32),
|
||||
}
|
||||
|
||||
def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
super().reset(seed=seed)
|
||||
if seed is not None:
|
||||
self._rng = np.random.default_rng(seed)
|
||||
self.commerce_platform._rng = np.random.default_rng(seed)
|
||||
self.t = 0
|
||||
init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
|
||||
self._prev_prices = init_prices.copy()
|
||||
self.state = {
|
||||
"elasticity": {
|
||||
"price": init_prices,
|
||||
"demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
|
||||
}
|
||||
}
|
||||
return self.state, {}
|
||||
self._t = 0
|
||||
self._alpha_true = float(np.clip(self.c.agent_share, *self.c.alpha_bounds))
|
||||
self._alpha_hat = float(self.c.agent_share)
|
||||
self._reset_catalogue()
|
||||
self._limbo = Limbo()
|
||||
self._last_sessions = []
|
||||
self._last_coi = None
|
||||
|
||||
def step(self, action: np.ndarray):
|
||||
self.t += 1
|
||||
base_prices = self.state["elasticity"]["price"].astype(np.float32)
|
||||
new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
|
||||
self.constraints.system_min_price,
|
||||
self.constraints.system_max_price).astype(np.float32)
|
||||
result = self.commerce_platform.run_pricing_simulation(new_prices)
|
||||
prices = self._prices if self._prices is not None else np.zeros(self.n, dtype=np.float32)
|
||||
obs = self._build_obs(prices, np.zeros(self.n, dtype=np.float32), 0.0, 0.0, 0.0)
|
||||
return obs, {"alpha_true": self._alpha_true}
|
||||
|
||||
if self.commerce_platform.use_defense:
|
||||
demand_est = result["q_hat_defended"]
|
||||
internal_err = result["internal_error_defended"]
|
||||
else:
|
||||
demand_est = result["q_hat_naive"]
|
||||
internal_err = result["internal_error_naive"]
|
||||
def step(self, action: np.ndarray) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]:
|
||||
if self._prices is None:
|
||||
raise RuntimeError("reset() must be called before step()")
|
||||
|
||||
self.state["elasticity"]["price"] = new_prices
|
||||
self.state["elasticity"]["demand"] = demand_est
|
||||
prev = self._prices
|
||||
prices = constrain_prices(
|
||||
prev,
|
||||
np.asarray(action, dtype=np.float32),
|
||||
costs=self._costs,
|
||||
min_price=float(self.c.system_min_price),
|
||||
max_price=float(self.c.system_max_price),
|
||||
max_adjustment=float(self.c.max_price_adjustment),
|
||||
min_margin_pct=float(self.c.min_margin_pct),
|
||||
)
|
||||
self._prices = prices
|
||||
self._limbo.add_update("prices", prices)
|
||||
|
||||
volatility = 0.0 if self._prev_prices is None else \
|
||||
float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
|
||||
self._prev_prices = new_prices.copy()
|
||||
sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents = self._observe_market(prices)
|
||||
self._last_sessions = sessions
|
||||
self._limbo.add_update("demand", demand_map)
|
||||
|
||||
revenue_observed = float(result["revenue_observed"])
|
||||
agent_loss = float(result["agent_loss"])
|
||||
err_mean = float(np.mean(internal_err))
|
||||
self._update_alpha_hat(self._last_sessions)
|
||||
self._last_coi = compute_coi_window(self._last_sessions, self._costs, demand_mapping=demand_map)
|
||||
|
||||
reward = (revenue_observed
|
||||
- self.constraints.w_agent_loss * agent_loss
|
||||
- self.constraints.w_volatility * volatility
|
||||
- self.constraints.w_estimation_error * err_mean)
|
||||
self._alpha_true = float(np.clip(self._alpha_true + self.c.alpha_drift, *self.c.alpha_bounds))
|
||||
volatility = float(np.std((prices - prev) / (prev + 1e-6)))
|
||||
reward = float(self._reward(prices, revenue, cost, volatility))
|
||||
conversion = float(np.sum(purchases) / max(len(self._last_sessions), 1))
|
||||
|
||||
terminated = self.t >= self.constraints.episode_length
|
||||
self._t += 1
|
||||
terminated = self._t >= int(self.c.max_steps)
|
||||
|
||||
obs = self._build_obs(prices, demand_by_product, revenue, conversion, min(volatility, 1.0))
|
||||
info = {
|
||||
"t": self.t,
|
||||
"revenue_observed": revenue_observed,
|
||||
"revenue_oracle": float(result["revenue_oracle"]),
|
||||
"agent_loss": agent_loss,
|
||||
"ux_volatility": volatility,
|
||||
"mean_internal_error": err_mean,
|
||||
"look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
|
||||
"mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
|
||||
"true_human_purchases_total": float(np.sum(result["true_human_demand"])),
|
||||
"true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
|
||||
"step": self._t,
|
||||
"reward": reward,
|
||||
"revenue": float(revenue),
|
||||
"profit": float(revenue - cost),
|
||||
"n_sessions": int(self.c.sessions_per_step),
|
||||
"n_agents": int(n_agents),
|
||||
"alpha_true": float(self._alpha_true),
|
||||
"alpha_hat": float(self._alpha_hat),
|
||||
"alpha_error": float(abs(self._alpha_hat - self._alpha_true)),
|
||||
"price_std": float(np.std(prices)),
|
||||
"price_volatility": float(volatility),
|
||||
}
|
||||
return self.state, float(reward), terminated, False, info
|
||||
if self._last_coi is not None:
|
||||
info.update(
|
||||
{
|
||||
"coi_policy": float(self._last_coi.policy),
|
||||
"coi_agent": float(self._last_coi.agent),
|
||||
"coi_leakage": float(self._last_coi.leak),
|
||||
"coi_survival": float(self._last_coi.survival_ratio),
|
||||
"coi_erosion": float(coi_erosion(self._last_coi.policy, self._last_coi.agent)),
|
||||
}
|
||||
)
|
||||
return obs, reward, terminated, False, info
|
||||
|
||||
def render(self, mode: str = "human") -> str | None:
|
||||
if self._prices is None:
|
||||
return None
|
||||
out = (
|
||||
f"t={self._t}/{self.c.max_steps} "
|
||||
f"alpha_true={self._alpha_true:.3f} alpha_hat={self._alpha_hat:.3f} "
|
||||
f"price_std={float(np.std(self._prices)):.2f}"
|
||||
)
|
||||
if mode == "human":
|
||||
print(out)
|
||||
return out
|
||||
|
||||
if __name__ == "__main__":
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
|
||||
runs = {}
|
||||
for use_defense in (False, True):
|
||||
env = PHANTOMEnv(use_defense=use_defense)
|
||||
obs, _ = env.reset(seed=42)
|
||||
metrics = defaultdict(list)
|
||||
total_reward = 0.0
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
action = env.action_space.sample()
|
||||
obs, reward, done, _, info = env.step(action)
|
||||
total_reward += reward
|
||||
p_mean = float(np.mean(obs["elasticity"]["price"]))
|
||||
q_mean = float(np.mean(obs["elasticity"]["demand"]))
|
||||
p_std = float(np.std(obs["elasticity"]["price"]))
|
||||
|
||||
metrics['t'].append(info['t'])
|
||||
metrics['price_mean'].append(p_mean)
|
||||
metrics['price_std'].append(p_std)
|
||||
metrics['demand_mean'].append(q_mean)
|
||||
metrics['revenue_observed'].append(info['revenue_observed'])
|
||||
metrics['revenue_oracle'].append(info['revenue_oracle'])
|
||||
metrics['agent_loss'].append(info['agent_loss'])
|
||||
metrics['ux_volatility'].append(info['ux_volatility'])
|
||||
metrics['look_to_book'].append(info['look_to_book'])
|
||||
metrics['reward'].append(reward)
|
||||
metrics['human_purchases'].append(info['true_human_purchases_total'])
|
||||
metrics['agent_purchases'].append(info['true_agent_purchases_total'])
|
||||
|
||||
if info['t'] % 20 == 0 or done:
|
||||
print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
|
||||
f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
|
||||
f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
|
||||
f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
|
||||
|
||||
runs[use_defense] = metrics
|
||||
print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
|
||||
|
||||
fig, axes = plt.subplots(3, 3, figsize=(15, 12))
|
||||
fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
|
||||
|
||||
plot_configs = [
|
||||
('price_mean', 'Mean Price', 'Price'),
|
||||
('demand_mean', 'Mean Demand Estimate', 'Demand'),
|
||||
('revenue_observed', 'Revenue (Observed)', 'Revenue'),
|
||||
('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
|
||||
('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
|
||||
('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
|
||||
('reward', 'Step Reward', 'Reward'),
|
||||
('human_purchases', 'Human Purchases', 'Count'),
|
||||
('agent_purchases', 'Agent Purchases', 'Count'),
|
||||
]
|
||||
|
||||
for idx, (key, title, ylabel) in enumerate(plot_configs):
|
||||
ax = axes[idx // 3, idx % 3]
|
||||
for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
|
||||
m = runs[use_defense]
|
||||
ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
|
||||
ax.set_xlabel('Step')
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.set_title(title, fontsize=10, fontweight='bold')
|
||||
ax.legend(loc='best', fontsize=8)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
|
||||
print("Plot saved to phantom_env_comparison.png")
|
||||
plt.show()
|
||||
def close(self) -> None:
|
||||
return
|
||||
|
||||
11
sim/rl/jax_core/__init__.py
Normal file
11
sim/rl/jax_core/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""JAX-accelerated simulation core for PHANTOM environment."""
|
||||
from .transitions import TransitionData, compile_transitions, fallback_transitions, JAX_AVAILABLE
|
||||
from .simulation import SessionBatch, SimResult, sample_sessions, compute_metrics
|
||||
from .features import session_features, compute_session_transitions
|
||||
from .separability import compute_divergences, estimate_alpha_batch
|
||||
|
||||
__all__ = [
|
||||
"JAX_AVAILABLE", "TransitionData", "compile_transitions", "fallback_transitions",
|
||||
"SessionBatch", "SimResult", "sample_sessions", "compute_metrics",
|
||||
"session_features", "compute_session_transitions", "compute_divergences", "estimate_alpha_batch",
|
||||
]
|
||||
69
sim/rl/jax_core/features.py
Normal file
69
sim/rl/jax_core/features.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Vectorized session feature extraction."""
|
||||
import numpy as np
|
||||
from .transitions import N_STATES, PURCHASE_IDX, CART_IDX
|
||||
from .simulation import SessionBatch
|
||||
|
||||
try:
|
||||
import jax.numpy as jnp
|
||||
from jax import jit
|
||||
JAX_AVAILABLE = True
|
||||
except ImportError:
|
||||
jnp, JAX_AVAILABLE = np, False
|
||||
def jit(f): return f
|
||||
|
||||
@jit
|
||||
def extract_features(states, dwells, lengths):
|
||||
"""Extract per-session features. Returns (n_sess, 9) array."""
|
||||
n, max_len = states.shape
|
||||
mask = jnp.arange(max_len)[None,:] < lengths[:,None]
|
||||
duration = jnp.sum(dwells * mask, axis=1)
|
||||
total = lengths.astype(jnp.float32)
|
||||
count = lambda idx: jnp.sum((states == idx) & mask, axis=1).astype(jnp.float32)
|
||||
views, learn, carts, purchases = count(1), count(2), count(3), count(4)
|
||||
velocity = total / (duration + 1e-6)
|
||||
conversion = purchases / (views + 1e-6)
|
||||
avg_dwell = duration / (total + 1e-6)
|
||||
return jnp.stack([duration, avg_dwell, total, velocity, views, carts, purchases, learn, conversion], axis=1)
|
||||
|
||||
def session_features(batch: SessionBatch) -> np.ndarray:
|
||||
if JAX_AVAILABLE:
|
||||
return np.asarray(extract_features(jnp.array(batch.states), jnp.array(batch.dwells), jnp.array(batch.lengths)))
|
||||
# numpy fallback
|
||||
n, max_len = batch.states.shape
|
||||
mask = np.arange(max_len)[None,:] < batch.lengths[:,None]
|
||||
duration = np.sum(batch.dwells * mask, axis=1)
|
||||
total = batch.lengths.astype(np.float32)
|
||||
count = lambda idx: np.sum((batch.states == idx) & mask, axis=1).astype(np.float32)
|
||||
views, learn, carts, purchases = count(1), count(2), count(3), count(4)
|
||||
return np.stack([duration, duration/(total+1e-6), total, total/(duration+1e-6), views, carts, purchases, learn, purchases/(views+1e-6)], axis=1)
|
||||
|
||||
@jit
|
||||
def session_transitions(states, lengths, n_states=N_STATES):
|
||||
"""Compute empirical transition counts per session. Returns (n_sess, n_states, n_states)."""
|
||||
n, max_len = states.shape
|
||||
mask = jnp.arange(max_len - 1)[None,:] < (lengths[:,None] - 1)
|
||||
src, dst = states[:, :-1], states[:, 1:]
|
||||
# handle -1 padding by clamping to valid range
|
||||
src_c, dst_c = jnp.clip(src, 0, n_states-1), jnp.clip(dst, 0, n_states-1)
|
||||
valid = mask & (src >= 0) & (dst >= 0)
|
||||
def per_session(i):
|
||||
s, d, v = src_c[i], dst_c[i], valid[i]
|
||||
trans = (jnp.eye(n_states)[s,:,None] * jnp.eye(n_states)[d,None,:]).sum(0) * v[:,None,None]
|
||||
return trans.sum(0)
|
||||
# vmap not ideal here, use manual loop for clarity
|
||||
trans = jnp.stack([per_session(i) for i in range(n)])
|
||||
row_sums = trans.sum(axis=-1, keepdims=True)
|
||||
return trans / (row_sums + 1e-10)
|
||||
|
||||
def compute_session_transitions(batch: SessionBatch) -> np.ndarray:
|
||||
if JAX_AVAILABLE:
|
||||
return np.asarray(session_transitions(jnp.array(batch.states), jnp.array(batch.lengths)))
|
||||
# numpy fallback
|
||||
n, max_len = batch.states.shape
|
||||
trans = np.zeros((n, N_STATES, N_STATES), dtype=np.float32)
|
||||
for i in range(n):
|
||||
for t in range(batch.lengths[i] - 1):
|
||||
s, d = batch.states[i, t], batch.states[i, t+1]
|
||||
if s >= 0 and d >= 0: trans[i, s, d] += 1
|
||||
row_sums = trans.sum(axis=-1, keepdims=True)
|
||||
return trans / (row_sums + 1e-10)
|
||||
43
sim/rl/jax_core/separability.py
Normal file
43
sim/rl/jax_core/separability.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Vectorized KL divergence for separability scoring."""
|
||||
import numpy as np
|
||||
from typing import Tuple
|
||||
|
||||
try:
|
||||
import jax.numpy as jnp
|
||||
from jax import jit
|
||||
JAX_AVAILABLE = True
|
||||
except ImportError:
|
||||
jnp, JAX_AVAILABLE = np, False
|
||||
def jit(f): return f
|
||||
|
||||
@jit
|
||||
def batch_kl(P, Q_human, Q_agent, eps=1e-10):
|
||||
"""Compute KL(P||Q) for batched P. P:(n,s,s), Q:(s,s). Returns (delta_h, delta_a) each (n,)."""
|
||||
p = P + eps
|
||||
p = p / p.sum(axis=-1, keepdims=True)
|
||||
qh, qa = Q_human[None] + eps, Q_agent[None] + eps
|
||||
delta_h = jnp.sum(p * jnp.log(p / qh), axis=(1, 2))
|
||||
delta_a = jnp.sum(p * jnp.log(p / qa), axis=(1, 2))
|
||||
return delta_h, delta_a
|
||||
|
||||
def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Compute KL divergence of each session from human/agent prototypes."""
|
||||
if JAX_AVAILABLE:
|
||||
dh, da = batch_kl(jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent))
|
||||
return np.asarray(dh), np.asarray(da)
|
||||
# numpy fallback
|
||||
eps = 1e-10
|
||||
p = session_trans + eps
|
||||
p = p / p.sum(axis=-1, keepdims=True)
|
||||
qh, qa = ref_human[None] + eps, ref_agent[None] + eps
|
||||
delta_h = np.sum(p * np.log(p / qh), axis=(1, 2))
|
||||
delta_a = np.sum(p * np.log(p / qa), axis=(1, 2))
|
||||
return delta_h, delta_a
|
||||
|
||||
def estimate_alpha_batch(prob_agent: np.ndarray, delta_h: np.ndarray, delta_a: np.ndarray, temp: float = 1.0) -> np.ndarray:
|
||||
"""Vectorized alpha estimation from classifier probs and divergences."""
|
||||
mass = delta_h + delta_a
|
||||
ratio = np.where(mass > 1e-8, delta_a / mass, 0.5)
|
||||
blended = 0.5 * prob_agent + 0.5 * ratio
|
||||
if temp <= 0: return np.clip(blended, 0.0, 1.0)
|
||||
return np.clip(1.0 / (1.0 + np.exp(-temp * (blended - 0.5))), 0.0, 1.0)
|
||||
116
sim/rl/jax_core/simulation.py
Normal file
116
sim/rl/jax_core/simulation.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Vectorized Markov chain session sampling with JAX."""
|
||||
from typing import NamedTuple, Tuple
|
||||
import numpy as np
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
import jax, jax.numpy as jnp
|
||||
from jax import lax
|
||||
JAX_AVAILABLE = True
|
||||
except ImportError:
|
||||
JAX_AVAILABLE = False
|
||||
|
||||
from .transitions import TransitionData, N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX
|
||||
|
||||
class SessionBatch(NamedTuple):
|
||||
states: np.ndarray # (n_sess, max_len) state indices, -1=padding
|
||||
dwells: np.ndarray # (n_sess, max_len) dwell times
|
||||
products: np.ndarray # (n_sess,) product index per session
|
||||
actors: np.ndarray # (n_sess,) 0=human, 1=agent
|
||||
lengths: np.ndarray # (n_sess,) actual session length
|
||||
|
||||
class SimResult(NamedTuple):
|
||||
demand_human: np.ndarray
|
||||
demand_agent: np.ndarray
|
||||
revenue: float
|
||||
revenue_oracle: float
|
||||
agent_loss: float
|
||||
coi: float
|
||||
look_to_book: float
|
||||
mean_sale_price: float
|
||||
n_human_purchases: int
|
||||
n_agent_purchases: int
|
||||
sessions: SessionBatch
|
||||
|
||||
if JAX_AVAILABLE:
|
||||
@partial(jax.jit, static_argnums=(5,6,7))
|
||||
def _sample_sessions_jax(key, T_human, T_agent, dwell_human, dwell_agent, n_human, n_agent, max_steps):
|
||||
n = n_human + n_agent
|
||||
k1, k2, k3, k4 = jax.random.split(key, 4)
|
||||
actors = jnp.concatenate([jnp.zeros(n_human, dtype=jnp.int32), jnp.ones(n_agent, dtype=jnp.int32)])
|
||||
T = jnp.where(actors[:,None,None]==0, T_human[None], T_agent[None]) # (n,6,6)
|
||||
dwell_p = jnp.where(actors[:,None,None]==0, dwell_human[None], dwell_agent[None]) # (n,6,2)
|
||||
|
||||
def step(carry, _):
|
||||
s, active, k = carry
|
||||
k, k1, k2 = jax.random.split(k, 3)
|
||||
probs = T[jnp.arange(n), s] # (n,6)
|
||||
nxt = jax.random.categorical(k1, jnp.log(probs + 1e-10))
|
||||
nxt = jnp.where(active, nxt, -1)
|
||||
shape = dwell_p[jnp.arange(n), s, 0]
|
||||
scale = dwell_p[jnp.arange(n), s, 1]
|
||||
dwell = jnp.maximum(0.3, jax.random.gamma(k2, shape) * scale)
|
||||
still = active & (nxt != TERM_IDX) & (nxt >= 0)
|
||||
return (nxt, still, k), (nxt, dwell)
|
||||
|
||||
init = (jnp.zeros(n, dtype=jnp.int32), jnp.ones(n, dtype=jnp.bool_), k3)
|
||||
_, (states, dwells) = lax.scan(step, init, None, length=max_steps)
|
||||
states, dwells = states.T, dwells.T # (n, max_steps)
|
||||
is_term = (states == -1) | (states == TERM_IDX)
|
||||
lengths = jnp.argmax(is_term, axis=1) + 1
|
||||
lengths = jnp.where(jnp.any(is_term, axis=1), lengths, max_steps)
|
||||
return states, dwells, actors, lengths
|
||||
|
||||
def sample_sessions(key, trans: TransitionData, n_human: int, n_agent: int, n_products: int, max_steps: int = 40) -> SessionBatch:
|
||||
if JAX_AVAILABLE:
|
||||
k1, k2 = jax.random.split(key)
|
||||
states, dwells, actors, lengths = _sample_sessions_jax(k1, trans.human_T, trans.agent_T, trans.human_dwell, trans.agent_dwell, n_human, n_agent, max_steps)
|
||||
products = jax.random.randint(k2, (n_human + n_agent,), 0, n_products)
|
||||
return SessionBatch(np.asarray(states), np.asarray(dwells), np.asarray(products), np.asarray(actors), np.asarray(lengths))
|
||||
# numpy fallback
|
||||
rng = np.random.default_rng(int(key[0]) if hasattr(key, '__getitem__') else 42)
|
||||
n = n_human + n_agent
|
||||
actors = np.concatenate([np.zeros(n_human, dtype=np.int32), np.ones(n_agent, dtype=np.int32)])
|
||||
products = rng.integers(0, n_products, size=n)
|
||||
states, dwells = np.full((n, max_steps), -1, dtype=np.int32), np.zeros((n, max_steps), dtype=np.float32)
|
||||
lengths = np.zeros(n, dtype=np.int32)
|
||||
for i in range(n):
|
||||
T = trans.human_T if actors[i] == 0 else trans.agent_T
|
||||
dp = trans.human_dwell if actors[i] == 0 else trans.agent_dwell
|
||||
s, t = 0, 0
|
||||
while t < max_steps and s != TERM_IDX:
|
||||
states[i, t] = s
|
||||
dwells[i, t] = max(0.3, rng.gamma(dp[s, 0], dp[s, 1]))
|
||||
s = rng.choice(N_STATES, p=T[s])
|
||||
t += 1
|
||||
lengths[i] = t
|
||||
return SessionBatch(states, dwells, products, actors, lengths)
|
||||
|
||||
def compute_metrics(batch: SessionBatch, prices: np.ndarray, unit_cost: np.ndarray, base_price: np.ndarray) -> SimResult:
|
||||
purchased = np.any(batch.states == PURCHASE_IDX, axis=1)
|
||||
human_mask, agent_mask = batch.actors == 0, batch.actors == 1
|
||||
human_purch, agent_purch = purchased & human_mask, purchased & agent_mask
|
||||
demand_h = np.bincount(batch.products[human_purch], minlength=len(prices)).astype(np.float32)
|
||||
demand_a = np.bincount(batch.products[agent_purch], minlength=len(prices)).astype(np.float32)
|
||||
# revenue and oracle
|
||||
purch_products = batch.products[purchased]
|
||||
revenue = float(np.sum(prices[purch_products]))
|
||||
revenue_oracle = float(np.sum(base_price[purch_products]))
|
||||
# agent loss: base_price - price_paid for agent purchases (agents gaming the system)
|
||||
agent_products = batch.products[agent_purch]
|
||||
agent_loss = float(np.sum(base_price[agent_products] - prices[agent_products]))
|
||||
# COI: margin - expected_premium*0.5 for human purchases
|
||||
human_products = batch.products[human_purch]
|
||||
if len(human_products) > 0:
|
||||
margin = float(np.mean(prices[human_products] - unit_cost[human_products]))
|
||||
premium = float(np.mean(base_price[human_products] - prices[human_products]))
|
||||
coi = max(0.0, margin - premium * 0.5)
|
||||
else:
|
||||
coi = 0.0
|
||||
# look to book: views / purchases
|
||||
views = float(np.sum(batch.states == 1)) # view_item_page = index 1
|
||||
n_purch = int(purchased.sum())
|
||||
look_to_book = views / (n_purch + 1e-6)
|
||||
mean_sale = float(np.mean(prices[purch_products])) if n_purch > 0 else 0.0
|
||||
return SimResult(demand_h, demand_a, revenue, revenue_oracle, agent_loss, coi, look_to_book, mean_sale,
|
||||
int(human_purch.sum()), int(agent_purch.sum()), batch)
|
||||
47
sim/rl/jax_core/transitions.py
Normal file
47
sim/rl/jax_core/transitions.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Dense transition matrices for JAX Markov chain sampling."""
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import jax.numpy as jnp
|
||||
JAX_AVAILABLE = True
|
||||
except ImportError:
|
||||
jnp, JAX_AVAILABLE = np, False
|
||||
|
||||
STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
|
||||
S2I = {s: i for i, s in enumerate(STATES)}
|
||||
N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX = len(STATES), 5, 4, 3
|
||||
|
||||
@dataclass
|
||||
class TransitionData:
|
||||
human_T: np.ndarray # (6,6) transition probs
|
||||
agent_T: np.ndarray # (6,6)
|
||||
human_dwell: np.ndarray # (6,2) shape,scale
|
||||
agent_dwell: np.ndarray # (6,2)
|
||||
|
||||
def to_jax(self):
|
||||
if not JAX_AVAILABLE: return self
|
||||
return TransitionData(*[jnp.array(x) for x in [self.human_T, self.agent_T, self.human_dwell, self.agent_dwell]])
|
||||
|
||||
def dict_to_dense(d):
|
||||
m = np.zeros((N_STATES, N_STATES), dtype=np.float32)
|
||||
for src, dsts in d.items():
|
||||
if (i := S2I.get(src)) is not None:
|
||||
for dst, p in dsts.items():
|
||||
if (j := S2I.get(dst)) is not None: m[i,j] = p
|
||||
m /= np.maximum(m.sum(1, keepdims=True), 1e-8)
|
||||
m[TERM_IDX] = 0; m[TERM_IDX, TERM_IDX] = 1.0
|
||||
return m
|
||||
|
||||
def compile_transitions(human_profile, agent_profile):
|
||||
def dwell_arr(params): return np.array([[params.get(s, (2.0, 1.0)) for s in STATES]], dtype=np.float32).reshape(N_STATES, 2)
|
||||
return TransitionData(dict_to_dense(human_profile.transitions), dict_to_dense(agent_profile.transitions),
|
||||
dwell_arr(human_profile.dwell_params), dwell_arr(agent_profile.dwell_params))
|
||||
|
||||
def fallback_transitions():
|
||||
H = {"session_start": {"view_item_page": .85, "session_end": .15}, "view_item_page": {"learn_more_about_item": .4, "add_item_to_cart": .3, "view_item_page": .2, "session_end": .1},
|
||||
"learn_more_about_item": {"add_item_to_cart": .5, "view_item_page": .3, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .6, "view_item_page": .25, "session_end": .15}, "purchase_complete": {"session_end": 1.0}}
|
||||
A = {"session_start": {"view_item_page": .9, "session_end": .1}, "view_item_page": {"learn_more_about_item": .5, "add_item_to_cart": .25, "view_item_page": .15, "session_end": .1},
|
||||
"learn_more_about_item": {"add_item_to_cart": .4, "view_item_page": .4, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .5, "view_item_page": .3, "session_end": .2}, "purchase_complete": {"session_end": 1.0}}
|
||||
dwell = np.full((N_STATES, 2), [2.0, 1.0], dtype=np.float32)
|
||||
return TransitionData(dict_to_dense(H), dict_to_dense(A), dwell.copy(), dwell.copy())
|
||||
175
sim/rl/train.py
Normal file
175
sim/rl/train.py
Normal file
@@ -0,0 +1,175 @@
|
||||
import numpy as np
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Type, Optional
|
||||
import pickle
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from sim.rl.environment import PHANTOMEnv, BusinessLogicConstraints
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from sim.rl.engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
|
||||
SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
|
||||
except ImportError as e:
|
||||
BasePricingEngine = None # engines not required for basic usage
|
||||
print(e)
|
||||
|
||||
|
||||
"""
|
||||
Target training loop:
|
||||
have base prices p0 from env reset and run the env step, collect reward and metrics
|
||||
pass this to the pricing engine which computes the price action to take based on previous reward by learning
|
||||
the new action gets passed to the step
|
||||
so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward
|
||||
to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices
|
||||
CURRENT SOLUTION BELOW does not implement correct learning or updates.
|
||||
"""
|
||||
|
||||
class EngineTrainer:
|
||||
"""wrapper to run pricing engines through episodes and collect metrics"""
|
||||
def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None):
|
||||
self.engine = engine
|
||||
self.env = env
|
||||
self.episode_metrics = []
|
||||
self.tb_writer = tb_writer
|
||||
self.global_step = 0
|
||||
|
||||
def train(self, n_episodes: int, seed: int = 42):
|
||||
for ep in range(n_episodes):
|
||||
obs, _ = self.env.reset(seed=seed + ep)
|
||||
self.engine.reset()
|
||||
done = False
|
||||
prev_prices = obs["elasticity"]["price"]
|
||||
episode_reward = 0.0
|
||||
last_info: Dict[str, float] = {}
|
||||
while not done:
|
||||
action_prices = self.engine.compute_prices(prev_prices, obs)
|
||||
obs, reward, done, _, info = self.env.step(action_prices)
|
||||
self.engine.update(obs, reward, done, info)
|
||||
episode_reward += reward
|
||||
prev_prices = obs["elasticity"]["price"]
|
||||
last_info = info
|
||||
if self.tb_writer:
|
||||
self.tb_writer.add_scalar("reward/step", reward, self.global_step)
|
||||
if "coi" in info:
|
||||
self.tb_writer.add_scalar("diagnostics/coi", info["coi"], self.global_step)
|
||||
if "alpha_hat" in info:
|
||||
self.tb_writer.add_scalar("diagnostics/alpha_hat", info["alpha_hat"], self.global_step)
|
||||
self.global_step += 1
|
||||
last_info = dict(last_info)
|
||||
last_info.update({"episode_reward": episode_reward, "episode": ep})
|
||||
self.episode_metrics.append(last_info)
|
||||
if self.tb_writer:
|
||||
self.tb_writer.add_scalar("reward/episode", episode_reward, ep)
|
||||
return self
|
||||
|
||||
def run_episode(self, seed: int = 42) -> Dict:
|
||||
"""run single evaluation episode and return metrics"""
|
||||
obs, _ = self.env.reset(seed=seed)
|
||||
self.engine.reset()
|
||||
total_reward = 0.0
|
||||
prev_prices = obs["elasticity"]["price"]
|
||||
ep_metrics = {'total_reward': 0.0}
|
||||
done = False
|
||||
while not done:
|
||||
action_prices = self.engine.compute_prices(prev_prices, obs)
|
||||
obs, reward, done, _, info = self.env.step(action_prices)
|
||||
total_reward += reward
|
||||
for k, v in info.items():
|
||||
ep_metrics[k] = v
|
||||
prev_prices = obs["elasticity"]["price"]
|
||||
ep_metrics['total_reward'] = total_reward
|
||||
return ep_metrics
|
||||
|
||||
def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict:
|
||||
"""evaluate trained engine"""
|
||||
results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle',
|
||||
'agent_loss', 'ux_volatility', 'look_to_book']}
|
||||
for ep in range(n_episodes):
|
||||
metrics = self.run_episode(seed=seed + ep)
|
||||
for k in results:
|
||||
results[k].append(metrics.get(k, 0.0))
|
||||
return {k: (np.mean(v), np.std(v)) for k, v in results.items()}
|
||||
|
||||
|
||||
def make_env():
|
||||
return PHANTOMEnv(constraints=BusinessLogicConstraints())
|
||||
|
||||
|
||||
def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42,
|
||||
tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
|
||||
constraints = env.constraints
|
||||
engine = engine_cls(constraints=constraints, seed=seed)
|
||||
trainer = EngineTrainer(engine, env, tb_writer=tb_writer)
|
||||
trainer.train(n_episodes, seed=seed)
|
||||
return trainer
|
||||
|
||||
|
||||
def save_trainer(trainer: EngineTrainer, path: Path):
|
||||
"""save engine state and metrics"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f)
|
||||
logger.info(f"Saved trainer to {path}")
|
||||
|
||||
|
||||
def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
|
||||
"""load saved engine"""
|
||||
with open(path, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer)
|
||||
trainer.episode_metrics = data['metrics']
|
||||
return trainer
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if BasePricingEngine is None:
|
||||
logger.error("Engines not available, cannot run training")
|
||||
exit(1)
|
||||
|
||||
base_dir = Path("./sim/rl/runs")
|
||||
base_dir.mkdir(exist_ok=True)
|
||||
|
||||
engines = {
|
||||
"Wild": WildPricingEngine,
|
||||
"Static": StaticPricingEngine,
|
||||
"RandomWalk": RandomWalkEngine,
|
||||
"ThompsonSampling": ThompsonSamplingEngine,
|
||||
}
|
||||
n_train_episodes = 50
|
||||
n_eval_episodes = 10
|
||||
seed = 42
|
||||
|
||||
logger.info(f"Training config: {n_train_episodes} episodes per engine")
|
||||
|
||||
trained_trainers = {}
|
||||
|
||||
for engine_name, engine_cls in engines.items():
|
||||
run_name = engine_name
|
||||
log_dir = base_dir / run_name
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"Training {engine_name}")
|
||||
logger.info(f"Log directory: {log_dir}")
|
||||
|
||||
env = make_env()
|
||||
tb_writer = SummaryWriter(log_dir=str(log_dir))
|
||||
trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
|
||||
tb_writer.close()
|
||||
|
||||
save_path = log_dir / "trainer.pkl"
|
||||
save_trainer(trainer, save_path)
|
||||
|
||||
trained_trainers[run_name] = (trainer, env)
|
||||
|
||||
logger.info("Starting evaluation")
|
||||
|
||||
for run_name, (trainer, env) in trained_trainers.items():
|
||||
logger.info(f"Evaluating {run_name}")
|
||||
results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000)
|
||||
for metric, (mean, std) in results.items():
|
||||
logger.info(f" {metric:20s}: {mean:10.2f} ± {std:6.2f}")
|
||||
|
||||
logger.info(f"Results saved to: {base_dir}")
|
||||
108
sim/strong_learner/data.py
Normal file
108
sim/strong_learner/data.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
import requests
|
||||
try:
|
||||
import py7zr # type: ignore
|
||||
except ImportError: # pragma: no cover - optional dependency
|
||||
py7zr = None
|
||||
import pandas as pd
|
||||
from typing import Generator
|
||||
try:
|
||||
from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader
|
||||
except ImportError:
|
||||
from loader import PayloadModel, ValueModel, InteractionModel, Loader
|
||||
|
||||
class YooChooseLoader(Loader):
|
||||
URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z"
|
||||
CLICK_COLS = ['session_id', 'ts', 'item_id', 'category']
|
||||
BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity']
|
||||
|
||||
def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000):
|
||||
self.root = root_dir
|
||||
self.chunk_size = chunk_size
|
||||
self.max_sessions = max_sessions
|
||||
self.click_path = f"{root_dir}/yoochoose-clicks.dat"
|
||||
self.buy_path = f"{root_dir}/yoochoose-buys.dat"
|
||||
if not os.path.exists(self.click_path): self._setup()
|
||||
self.data = self._load_sessions(max_sessions)
|
||||
self.entries = list(self.data.keys())
|
||||
|
||||
def _setup(self):
|
||||
if py7zr is None:
|
||||
raise RuntimeError("py7zr is required to unpack YooChoose dataset. Install py7zr first.")
|
||||
os.makedirs(self.root, exist_ok=True)
|
||||
zip_path = f"{self.root}/temp.7z"
|
||||
with requests.get(self.URL, stream=True) as r:
|
||||
with open(zip_path, 'wb') as f:
|
||||
for chunk in r.iter_content(8192):
|
||||
f.write(chunk)
|
||||
with py7zr.SevenZipFile(zip_path, 'r') as z:
|
||||
z.extractall(self.root)
|
||||
os.remove(zip_path)
|
||||
|
||||
def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel:
|
||||
payload = PayloadModel(
|
||||
sessionId=sid, experimentId=None, eventName=event,
|
||||
page=page, productId=item_id, metadata=meta,
|
||||
storeMode="yoochoose", userAgent="dataset", ts=ts
|
||||
)
|
||||
return InteractionModel(
|
||||
partitionID=0, offset=0, timestamp=0, compression="",
|
||||
isTransactional=False, headers=[], key={},
|
||||
value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0)
|
||||
)
|
||||
|
||||
def _parse_category(self, cat) -> str:
|
||||
if pd.isna(cat) or cat == "0": return "unknown"
|
||||
if cat == "S": return "special_offer"
|
||||
try:
|
||||
n = int(cat)
|
||||
return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}"
|
||||
except: return str(cat)
|
||||
|
||||
def stream_clicks(self) -> Generator[InteractionModel, None, None]:
|
||||
with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader:
|
||||
for chunk in reader:
|
||||
for r in chunk.itertuples(index=False):
|
||||
yield self._make_interaction(
|
||||
str(r.session_id), r.ts, str(r.item_id),
|
||||
"view_item_page", self._parse_category(r.category), {}
|
||||
)
|
||||
|
||||
def stream_buys(self) -> Generator[InteractionModel, None, None]:
|
||||
with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader:
|
||||
for chunk in reader:
|
||||
for r in chunk.itertuples(index=False):
|
||||
yield self._make_interaction(
|
||||
str(r.session_id), r.ts, str(r.item_id),
|
||||
"purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity}
|
||||
)
|
||||
|
||||
def stream(self) -> Generator[InteractionModel, None, None]:
|
||||
yield from self.stream_clicks()
|
||||
yield from self.stream_buys()
|
||||
|
||||
def _load_sessions(self, max_sessions: int | None = None) -> dict:
|
||||
sessions = {}
|
||||
for interaction in self.stream():
|
||||
sid = interaction.value.payload.sessionId
|
||||
if sid not in sessions:
|
||||
if max_sessions and len(sessions) >= max_sessions: continue
|
||||
sessions[sid] = []
|
||||
sessions[sid].append(interaction)
|
||||
for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts)
|
||||
return sessions
|
||||
|
||||
def get_data(self) -> dict:
|
||||
return self.data
|
||||
|
||||
def get_entries(self) -> tuple[list[str], int]:
|
||||
return self.entries, len(self.entries)
|
||||
|
||||
if __name__ == "__main__":
|
||||
loader = YooChooseLoader(max_sessions=100)
|
||||
views, purchases = 0, 0
|
||||
for sid, evts in loader.get_data().items():
|
||||
for e in evts:
|
||||
if e.value.payload.eventName == "view_item_page": views += 1
|
||||
elif e.value.payload.eventName == "purchase_complete": purchases += 1
|
||||
print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete")
|
||||
7
tests/e2e/.env.example
Normal file
7
tests/e2e/.env.example
Normal file
@@ -0,0 +1,7 @@
|
||||
WEB_URL=http://localhost:3000
|
||||
BACKEND_URL=http://localhost:5000
|
||||
PRICING_PROVIDER_URL=http://localhost:5001
|
||||
AIRFLOW_URL=http://localhost:8085
|
||||
AIRFLOW_USER=admin
|
||||
AIRFLOW_PASS=admin
|
||||
HEADLESS=true
|
||||
61
tests/e2e/helpers/airflow.ts
Normal file
61
tests/e2e/helpers/airflow.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085';
|
||||
const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64');
|
||||
|
||||
const req = (path: string, opts: any = {}) => {
|
||||
const headers = { Authorization: AUTH, ...opts.headers };
|
||||
return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers });
|
||||
};
|
||||
|
||||
export const triggerDag = async (dagId: string, conf = {}) => {
|
||||
const r = await req(`/api/v1/dags/${dagId}/dagRuns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ conf }),
|
||||
});
|
||||
if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`);
|
||||
return (await r.json()).dag_run_id;
|
||||
};
|
||||
|
||||
export const getDagStatus = async (dagId: string, runId: string) => {
|
||||
const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`);
|
||||
if (!r.ok) throw new Error(`Get status failed: ${r.status}`);
|
||||
return (await r.json()).state;
|
||||
};
|
||||
|
||||
export const cancelDag = async (dagId: string, runId: string) => {
|
||||
const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ state: 'failed' }),
|
||||
});
|
||||
if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`);
|
||||
};
|
||||
|
||||
export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => {
|
||||
const t0 = Date.now();
|
||||
while (Date.now() - t0 < maxMs) {
|
||||
const state = await getDagStatus(dagId, runId);
|
||||
if (state === 'success') return;
|
||||
if (state === 'failed') throw new Error(`DAG ${runId} failed`);
|
||||
await new Promise(r => setTimeout(r, pollMs));
|
||||
}
|
||||
await cancelDag(dagId, runId);
|
||||
throw new Error(`DAG ${runId} timeout`);
|
||||
};
|
||||
|
||||
export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => {
|
||||
const runId = await triggerDag(dagId, conf);
|
||||
await waitForDag(dagId, runId, maxMs);
|
||||
};
|
||||
|
||||
export const runSessionPricing = (mode = 'hotel') =>
|
||||
runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000);
|
||||
|
||||
export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) =>
|
||||
runDag('surge_pricing_pipeline', {
|
||||
store_mode: mode,
|
||||
high_threshold: highThresh,
|
||||
low_threshold: lowThresh,
|
||||
surge_multiplier: 1.2,
|
||||
discount_multiplier: 0.9
|
||||
}, 90000);
|
||||
@@ -9,8 +9,8 @@ interface InteractionEvent {
|
||||
const dumpKafkaTopic = async (backendUrl: string, topic: string) => {
|
||||
const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`);
|
||||
if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`);
|
||||
const { messages = [] } = await resp.json();
|
||||
return messages as any[];
|
||||
const { data = [] } = await resp.json();
|
||||
return data as any[];
|
||||
};
|
||||
|
||||
export const waitForInteractionEvent = async (
|
||||
|
||||
@@ -5,14 +5,14 @@ export default defineConfig({
|
||||
fullyParallel: true,
|
||||
forbidOnly: !!process.env.CI,
|
||||
retries: 0,
|
||||
workers: 5,
|
||||
workers: 1,
|
||||
reporter: 'list',
|
||||
use: {
|
||||
baseURL: process.env.WEB_URL || 'http://localhost:3000',
|
||||
trace: 'retain-on-failure',
|
||||
screenshot: 'only-on-failure',
|
||||
},
|
||||
timeout: 60000,
|
||||
timeout: 180000,
|
||||
expect: {
|
||||
timeout: 10000,
|
||||
},
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
addToCart,
|
||||
} from '../helpers/interactions';
|
||||
import { getSessionEvents } from '../helpers/kafka';
|
||||
import { runSessionPricing } from '../helpers/airflow';
|
||||
|
||||
test.describe('SessionAwarePricer E2E', () => {
|
||||
const STORE_TYPE = 'hotel';
|
||||
@@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => {
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
const productId2 = await humanLikeViewProduct(page, STORE_TYPE);
|
||||
|
||||
await runSessionPricing(STORE_TYPE);
|
||||
|
||||
const secondPrice = await getPriceFromDOM(page);
|
||||
expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
|
||||
|
||||
@@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => {
|
||||
await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE);
|
||||
expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
|
||||
|
||||
await page.waitForTimeout(2500);
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
const events = await getSessionEvents(backendUrl, sessionId);
|
||||
expect(events.length).toBeGreaterThanOrEqual(8);
|
||||
|
||||
await runSessionPricing(STORE_TYPE);
|
||||
|
||||
await page.goto(`/products/${productId}`);
|
||||
await page.waitForLoadState('networkidle');
|
||||
const agentPrice = await getPriceFromDOM(page);
|
||||
@@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => {
|
||||
const productId = await viewProductViaFlow(page, STORE_TYPE);
|
||||
const baselinePrice = await getPriceFromDOM(page);
|
||||
|
||||
const startTime = Date.now();
|
||||
await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE);
|
||||
const duration = (Date.now() - startTime) / 1000;
|
||||
|
||||
const eventsPerSec = 10 / duration;
|
||||
expect(eventsPerSec).toBeGreaterThan(2.0);
|
||||
const events = await getSessionEvents(backendUrl, sessionId);
|
||||
expect(events.length).toBeGreaterThanOrEqual(10);
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
await runSessionPricing(STORE_TYPE);
|
||||
|
||||
await page.goto(`/products/${productId}`);
|
||||
await page.waitForLoadState('networkidle');
|
||||
@@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => {
|
||||
|
||||
await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE);
|
||||
|
||||
await page.waitForTimeout(1500);
|
||||
await page.waitForTimeout(1000);
|
||||
await humanLikeViewProduct(page, STORE_TYPE);
|
||||
|
||||
await runSessionPricing(STORE_TYPE);
|
||||
|
||||
const finalPrice = await getPriceFromDOM(page);
|
||||
|
||||
expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3);
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
verifySessionConsistency,
|
||||
} from '../helpers/interactions';
|
||||
import { waitForInteractionEvent, countProductViews } from '../helpers/kafka';
|
||||
import { runSurgePricing } from '../helpers/airflow';
|
||||
|
||||
test.describe('SimpleSurgePricer E2E', () => {
|
||||
const STORE_TYPE = 'hotel';
|
||||
@@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => {
|
||||
|
||||
await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE);
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page');
|
||||
expect(evt).not.toBeNull();
|
||||
@@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => {
|
||||
const viewCount = await countProductViews(backendUrl, productId);
|
||||
expect(viewCount).toBeGreaterThanOrEqual(5);
|
||||
|
||||
await runSurgePricing(STORE_TYPE, 3, 1);
|
||||
|
||||
await page.goto(`/products/${productId}`);
|
||||
await page.waitForLoadState('networkidle');
|
||||
const surgedPrice = await getPriceFromDOM(page);
|
||||
@@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => {
|
||||
|
||||
await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE);
|
||||
|
||||
await page.waitForTimeout(1500);
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
await runSurgePricing(STORE_TYPE, 3, 1);
|
||||
|
||||
await page.goto(`/products/${productId}`);
|
||||
await page.waitForLoadState('networkidle');
|
||||
@@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => {
|
||||
|
||||
await page.waitForTimeout(12000);
|
||||
|
||||
await runSurgePricing(STORE_TYPE, 3, 1);
|
||||
|
||||
await page.goto(`/products/${productId}`);
|
||||
await page.waitForLoadState('networkidle');
|
||||
const decayedPrice = await getPriceFromDOM(page);
|
||||
|
||||
@@ -30,6 +30,8 @@ export async function GET(req: NextRequest) {
|
||||
const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001';
|
||||
try {
|
||||
const queryParams = new URLSearchParams();
|
||||
// THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed
|
||||
// The whole pipeline gets triggered from here.
|
||||
if (sessionId) queryParams.append('sessionId', sessionId);
|
||||
if (experimentId) queryParams.append('experimentId', experimentId);
|
||||
|
||||
@@ -55,25 +57,26 @@ export async function GET(req: NextRequest) {
|
||||
price = Math.round(randomBase * 100) / 100;
|
||||
}
|
||||
|
||||
// log price to kafka for elasticity computation
|
||||
// log price to kafka asynchronously (non-blocking)
|
||||
if (sessionId) {
|
||||
const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000';
|
||||
try {
|
||||
await fetch(`${backendUrl}/api/kafka/price-log`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
productId,
|
||||
price,
|
||||
sessionId,
|
||||
experimentId: experimentId || undefined,
|
||||
storeMode,
|
||||
ts: timestamp,
|
||||
}),
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[price-log-error]', err);
|
||||
}
|
||||
// fire and forget - don't await to avoid blocking response
|
||||
fetch(`${backendUrl}/api/kafka/price-log`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
productId,
|
||||
price,
|
||||
sessionId,
|
||||
experimentId: experimentId || undefined,
|
||||
storeMode,
|
||||
ts: timestamp,
|
||||
}),
|
||||
}).catch(err => {
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
console.error('[price-log-error]', err);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
|
||||
@@ -32,7 +32,8 @@ export default function CartPage() {
|
||||
{itemCount > 0 && (
|
||||
<button
|
||||
onClick={clearCart}
|
||||
className="text-sm text-red-600 hover:underline"
|
||||
className="text-sm hover:underline"
|
||||
style={{ color: 'var(--accent-warning)' }}
|
||||
>
|
||||
Clear cart
|
||||
</button>
|
||||
@@ -42,7 +43,7 @@ export default function CartPage() {
|
||||
{itemCount === 0 ? (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-500 mb-4">Your cart is empty</p>
|
||||
<a href="/" className="text-blue-600 hover:underline">Browse our selection</a>
|
||||
<a href="/" className="hover:underline" style={{ color: 'var(--text-accent)' }}>Browse our selection</a>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
@@ -54,15 +55,11 @@ export default function CartPage() {
|
||||
>
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className="px-2 py-0.5 text-xs font-medium rounded bg-blue-100 text-blue-800">
|
||||
{item.type}
|
||||
</span>
|
||||
<h3 className="font-semibold">{item.name}</h3>
|
||||
</div>
|
||||
|
||||
{item.type === 'hotel' && (
|
||||
<div className="text-sm text-gray-600">
|
||||
<p>{String(item.metadata.roomType)}</p>
|
||||
<p>{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}</p>
|
||||
<p>{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}</p>
|
||||
</div>
|
||||
@@ -81,7 +78,8 @@ export default function CartPage() {
|
||||
<p className="text-xl font-bold mb-2">${item.price}</p>
|
||||
<button
|
||||
onClick={() => handleRemove(item.id, item.type)}
|
||||
className="text-sm text-red-600 hover:underline"
|
||||
className="text-sm hover:underline"
|
||||
style={{ color: 'var(--accent-warning)' }}
|
||||
>
|
||||
Remove
|
||||
</button>
|
||||
@@ -100,7 +98,7 @@ export default function CartPage() {
|
||||
dispatchInteraction('checkout_start', undefined, { total, itemCount });
|
||||
window.location.href = '/checkout';
|
||||
}}
|
||||
className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors"
|
||||
className="btn-primary w-full"
|
||||
>
|
||||
Proceed to Checkout
|
||||
</button>
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
--bg-secondary: #f5f5f5;
|
||||
--text-primary: #333333;
|
||||
--text-secondary: #666666;
|
||||
--accent-primary: #007aff;
|
||||
--accent-primary-hover: #0051d5;
|
||||
--accent-primary-light: #e6f2ff;
|
||||
--spacing-sm: 8px;
|
||||
--spacing-md: 16px;
|
||||
--spacing-lg: 32px;
|
||||
|
||||
@@ -15,8 +15,8 @@ const geistMono = Geist_Mono({
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Create Next App",
|
||||
description: "Generated by create next app",
|
||||
title: "Travel Booking Platform",
|
||||
description: "Book flights and hotels with dynamic pricing",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
|
||||
@@ -1,65 +1,5 @@
|
||||
import Image from "next/image";
|
||||
import { redirect } from 'next/navigation';
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<div className="flex min-h-screen items-center justify-center bg-zinc-50 font-sans dark:bg-black">
|
||||
<main className="flex min-h-screen w-full max-w-3xl flex-col items-center justify-between py-32 px-16 bg-white dark:bg-black sm:items-start">
|
||||
<Image
|
||||
className="dark:invert"
|
||||
src="/next.svg"
|
||||
alt="Next.js logo"
|
||||
width={100}
|
||||
height={20}
|
||||
priority
|
||||
/>
|
||||
<div className="flex flex-col items-center gap-6 text-center sm:items-start sm:text-left">
|
||||
<h1 className="max-w-xs text-3xl font-semibold leading-10 tracking-tight text-black dark:text-zinc-50">
|
||||
To get started, edit the page.tsx file.
|
||||
</h1>
|
||||
<p className="max-w-md text-lg leading-8 text-zinc-600 dark:text-zinc-400">
|
||||
Looking for a starting point or more instructions? Head over to{" "}
|
||||
<a
|
||||
href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
className="font-medium text-zinc-950 dark:text-zinc-50"
|
||||
>
|
||||
Templates
|
||||
</a>{" "}
|
||||
or the{" "}
|
||||
<a
|
||||
href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
className="font-medium text-zinc-950 dark:text-zinc-50"
|
||||
>
|
||||
Learning
|
||||
</a>{" "}
|
||||
center.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-col gap-4 text-base font-medium sm:flex-row">
|
||||
<a
|
||||
className="flex h-12 w-full items-center justify-center gap-2 rounded-full bg-foreground px-5 text-background transition-colors hover:bg-[#383838] dark:hover:bg-[#ccc] md:w-[158px]"
|
||||
href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Image
|
||||
className="dark:invert"
|
||||
src="/vercel.svg"
|
||||
alt="Vercel logomark"
|
||||
width={16}
|
||||
height={16}
|
||||
/>
|
||||
Deploy Now
|
||||
</a>
|
||||
<a
|
||||
className="flex h-12 w-full items-center justify-center rounded-full border border-solid border-black/[.08] px-5 transition-colors hover:border-transparent hover:bg-black/[.04] dark:border-white/[.145] dark:hover:bg-[#1a1a1a] md:w-[158px]"
|
||||
href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
Documentation
|
||||
</a>
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
redirect('/hotel');
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import type { EventName } from '@/lib/events';
|
||||
import type { Hotel } from '@/lib/hotel-utils';
|
||||
import { getHotelImageUrl } from '@/lib/hotel-utils';
|
||||
import { useHoverTracking } from '@/hooks/useHoverTracking';
|
||||
import PriceDisplay from '@/components/ui/PriceDisplay';
|
||||
|
||||
@@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
|
||||
window.location.href = `/hotel/products/${hotel.id}`;
|
||||
};
|
||||
|
||||
const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="hotel-card cursor-pointer"
|
||||
@@ -56,7 +55,7 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
|
||||
>
|
||||
<div className="hotel-image relative overflow-hidden">
|
||||
<img
|
||||
src={imageUrl}
|
||||
src={getHotelImageUrl(hotel.id, { w: 400, h: 300 })}
|
||||
alt={hotel.name}
|
||||
className="w-full h-full object-cover"
|
||||
onError={(e) => {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import { useState, useEffect } from 'react';
|
||||
import type { Hotel } from '@/lib/hotel-utils';
|
||||
import { getHotelImageUrl } from '@/lib/hotel-utils';
|
||||
import PriceDisplay from '@/components/ui/PriceDisplay';
|
||||
|
||||
interface HotelDetailsProps {
|
||||
@@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n
|
||||
};
|
||||
|
||||
export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) {
|
||||
const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`;
|
||||
|
||||
return (
|
||||
<div className="w-full flex flex-col lg:flex-row gap-12 py-8">
|
||||
<div className="w-full lg:w-1/2 rounded-lg aspect-[4/3] overflow-hidden shrink-0">
|
||||
<img
|
||||
src={imageUrl}
|
||||
src={getHotelImageUrl(product.id, { w: 800, h: 600 })}
|
||||
alt={product.name}
|
||||
className="w-full h-full object-cover"
|
||||
onError={(e) => {
|
||||
|
||||
@@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode }
|
||||
href={href}
|
||||
className={`px-4 py-2 rounded-md transition-colors ${
|
||||
isActive
|
||||
? 'bg-[var(--accent-primary)] font-semibold'
|
||||
? 'bg-[var(--accent-primary)] text-white font-semibold'
|
||||
: 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]'
|
||||
}`}
|
||||
>
|
||||
|
||||
@@ -31,7 +31,7 @@ export interface Flight {
|
||||
availability: number;
|
||||
}
|
||||
|
||||
const EPOCH = new Date(0);
|
||||
import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
|
||||
|
||||
export const transformProduct = (p: AirlineProduct): Flight => {
|
||||
const { id, flight_type, date_index, metadata, availability } = p;
|
||||
@@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => {
|
||||
};
|
||||
};
|
||||
|
||||
// convert date string to days from today
|
||||
export const dateToDaysFromToday = (dateStr: string): number => {
|
||||
const target = new Date(dateStr);
|
||||
target.setHours(0, 0, 0, 0);
|
||||
const today = new Date();
|
||||
today.setHours(0, 0, 0, 0);
|
||||
return Math.floor((target.getTime() - today.getTime()) / 86400000);
|
||||
};
|
||||
|
||||
// convert date string to date_index (days since epoch)
|
||||
export const dateToIndex = (dateStr: string): number => {
|
||||
const d = new Date(dateStr);
|
||||
return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
|
||||
};
|
||||
|
||||
// get current date_index
|
||||
export const todayIndex = (): number => {
|
||||
const now = new Date();
|
||||
now.setHours(0, 0, 0, 0);
|
||||
return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
|
||||
};
|
||||
export { dateToDaysFromToday, dateToIndex, todayIndex };
|
||||
|
||||
23
web/src/lib/date-utils.ts
Normal file
23
web/src/lib/date-utils.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
const EPOCH = new Date(0);
|
||||
const MS_PER_DAY = 86400000;
|
||||
|
||||
export const dateToDaysFromToday = (dateStr: string): number => {
|
||||
const target = new Date(dateStr);
|
||||
target.setHours(0, 0, 0, 0);
|
||||
const today = new Date();
|
||||
today.setHours(0, 0, 0, 0);
|
||||
return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY);
|
||||
};
|
||||
|
||||
export const dateToIndex = (dateStr: string): number => {
|
||||
const d = new Date(dateStr);
|
||||
return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY);
|
||||
};
|
||||
|
||||
export const todayIndex = (): number => {
|
||||
const now = new Date();
|
||||
now.setHours(0, 0, 0, 0);
|
||||
return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY);
|
||||
};
|
||||
|
||||
export { EPOCH, MS_PER_DAY };
|
||||
@@ -25,7 +25,7 @@ export interface Hotel {
|
||||
nights: number;
|
||||
}
|
||||
|
||||
const EPOCH = new Date(0);
|
||||
import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
|
||||
|
||||
export const transformProduct = (p: HotelProduct): Hotel => {
|
||||
const { id, room_type, date_index, metadata } = p;
|
||||
@@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => {
|
||||
// legacy: treat as offset from today
|
||||
const today = new Date();
|
||||
today.setHours(0, 0, 0, 0);
|
||||
checkIn = new Date(today.getTime() + date_index * 86400000);
|
||||
checkIn = new Date(today.getTime() + date_index * MS_PER_DAY);
|
||||
} else {
|
||||
// proper: days since epoch
|
||||
checkIn = new Date(EPOCH.getTime() + date_index * 86400000);
|
||||
checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY);
|
||||
}
|
||||
|
||||
const nights = 1;
|
||||
const checkOut = new Date(checkIn.getTime() + nights * 86400000);
|
||||
const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY);
|
||||
|
||||
const formatOpts: Intl.DateTimeFormatOptions = {
|
||||
month: 'short',
|
||||
@@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => {
|
||||
};
|
||||
};
|
||||
|
||||
// convert date string to days from today
|
||||
export const dateToDaysFromToday = (dateStr: string): number => {
|
||||
const target = new Date(dateStr);
|
||||
target.setHours(0, 0, 0, 0);
|
||||
const today = new Date();
|
||||
today.setHours(0, 0, 0, 0);
|
||||
return Math.floor((target.getTime() - today.getTime()) / 86400000);
|
||||
const hotelImagePool = [
|
||||
'photo-1566073771259-6a8506099945',
|
||||
'photo-1551882547-ff40c63fe5fa',
|
||||
'photo-1590490360182-c33d57733427',
|
||||
'photo-1582719478250-c89cae4dc85b',
|
||||
'photo-1596701062351-8c2c14d1fdd0',
|
||||
'photo-1631049307264-da0ec9d70304',
|
||||
'photo-1578683010236-d716f9a3f461',
|
||||
'photo-1540518614846-7eded433c457',
|
||||
'photo-1505693416388-ac5ce068fe85',
|
||||
'photo-1522771739844-6a9f6d5f14af',
|
||||
'photo-1562438668-bcf0ca6578f0',
|
||||
'photo-1595576508898-0ad5c879a061',
|
||||
];
|
||||
|
||||
const hashString = (s: string): number => {
|
||||
let h = 0;
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
h = ((h << 5) - h) + s.charCodeAt(i);
|
||||
h = h & h;
|
||||
}
|
||||
return Math.abs(h);
|
||||
};
|
||||
|
||||
// convert date string to date_index (days since epoch)
|
||||
export const dateToIndex = (dateStr: string): number => {
|
||||
const d = new Date(dateStr);
|
||||
return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
|
||||
export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => {
|
||||
const idx = hashString(hotelId) % hotelImagePool.length;
|
||||
const photoId = hotelImagePool[idx];
|
||||
return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`;
|
||||
};
|
||||
|
||||
// get current date_index
|
||||
export const todayIndex = (): number => {
|
||||
const now = new Date();
|
||||
now.setHours(0, 0, 0, 0);
|
||||
return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
|
||||
};
|
||||
export { dateToDaysFromToday, dateToIndex, todayIndex };
|
||||
|
||||
Reference in New Issue
Block a user