mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
Merge pull request #55 from velocitatem/optimizing-runs
Enhance TPU orchestration and parallelization with benchmarks
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,6 +3,7 @@
|
|||||||
.env.*
|
.env.*
|
||||||
!.env.*.example
|
!.env.*.example
|
||||||
**/.venv
|
**/.venv
|
||||||
|
**/.venv-ray
|
||||||
|
|
||||||
# python build/cache artifacts
|
# python build/cache artifacts
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
|
|||||||
35
.rayignore
Normal file
35
.rayignore
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
.venv*
|
||||||
|
venv
|
||||||
|
venv*
|
||||||
|
**/.venv
|
||||||
|
**/venv
|
||||||
|
**/node_modules
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Python caches
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Git
|
||||||
|
.git/
|
||||||
|
|
||||||
|
# Large data and logs
|
||||||
|
data/
|
||||||
|
experiments/
|
||||||
|
wandb/
|
||||||
|
dumplogs*
|
||||||
|
*.zip
|
||||||
|
*.pdf
|
||||||
|
*.log
|
||||||
|
*.dot
|
||||||
|
|
||||||
|
# Other large dirs
|
||||||
|
PHANTOM_web/
|
||||||
|
web/
|
||||||
|
docs/
|
||||||
|
paper/
|
||||||
|
.nx/
|
||||||
58
Makefile
58
Makefile
@@ -11,6 +11,7 @@ PYTEST := $(VENV)/bin/pytest
|
|||||||
NX := npx nx
|
NX := npx nx
|
||||||
|
|
||||||
SWEEP_ENV_FILE ?= .env.sweep
|
SWEEP_ENV_FILE ?= .env.sweep
|
||||||
|
TPU_CONF ?= tpu_orchestration/configs/v4_spot_us.conf
|
||||||
|
|
||||||
WANDB_ENTITY ?=
|
WANDB_ENTITY ?=
|
||||||
WANDB_PROJECT ?= capstone
|
WANDB_PROJECT ?= capstone
|
||||||
@@ -21,6 +22,14 @@ SIMPLE_BENCHMARK_ARGS ?= --tiers qtable,ppo,dqn,a2c --alpha-values 0.0,0.15,0.3,
|
|||||||
BENCHMARK_AGENT_ARGS ?=
|
BENCHMARK_AGENT_ARGS ?=
|
||||||
AGENT_COUNT ?= 0
|
AGENT_COUNT ?= 0
|
||||||
|
|
||||||
|
WHOCLICKED_REPO ?= velocitatem/whoclickedit
|
||||||
|
WHOCLICKED_CSV ?= experiments/exports/whoclicked.csv
|
||||||
|
WHOCLICKED_CARD ?= experiments/exports/whoclicked_dataset_card.md
|
||||||
|
WHOCLICKED_CSV_PATH_IN_REPO ?= whoclicked.csv
|
||||||
|
WHOCLICKED_CARD_PATH_IN_REPO ?= README.md
|
||||||
|
WHOCLICKED_DATASET_MESSAGE ?= Update flattened whoclickedit dataset
|
||||||
|
WHOCLICKED_CARD_MESSAGE ?= Update dataset card for whoclickedit
|
||||||
|
|
||||||
REPO_URL ?=
|
REPO_URL ?=
|
||||||
BRANCH ?= main
|
BRANCH ?= main
|
||||||
WORKDIR ?= $(HOME)/PHANTOM-agent
|
WORKDIR ?= $(HOME)/PHANTOM-agent
|
||||||
@@ -35,8 +44,10 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
|
|||||||
|
|
||||||
.PHONY: help
|
.PHONY: help
|
||||||
help:
|
help:
|
||||||
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines"
|
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all"
|
||||||
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
|
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
|
||||||
|
@echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
|
||||||
|
@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Build general public version:"
|
@echo "Build general public version:"
|
||||||
@echo " make pdf.genpop"
|
@echo " make pdf.genpop"
|
||||||
@@ -56,6 +67,12 @@ help:
|
|||||||
@echo "Bootstrap private repo worker from anywhere:"
|
@echo "Bootstrap private repo worker from anywhere:"
|
||||||
@echo " make train.bootstrap REPO_URL=https://github.com/org/repo.git BRANCH=main SWEEP_ID=entity/project/id"
|
@echo " make train.bootstrap REPO_URL=https://github.com/org/repo.git BRANCH=main SWEEP_ID=entity/project/id"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
@echo "Bootstrap Ray on TPU slice from config:"
|
||||||
|
@echo " make tpu.ray.bootstrap TPU_CONF=tpu_orchestration/configs/v4_spot_us.conf"
|
||||||
|
@echo ""
|
||||||
|
@echo "Publish whoclickedit dataset + card:"
|
||||||
|
@echo " make data.whoclicked.publish HF_TOKEN=... WHOCLICKED_REPO=velocitatem/whoclickedit"
|
||||||
|
@echo ""
|
||||||
@echo "Config source: $(SWEEP_ENV_FILE) (auto-loaded)"
|
@echo "Config source: $(SWEEP_ENV_FILE) (auto-loaded)"
|
||||||
|
|
||||||
$(BUILDDIR):
|
$(BUILDDIR):
|
||||||
@@ -133,10 +150,42 @@ train.agent:
|
|||||||
train.bootstrap:
|
train.bootstrap:
|
||||||
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
|
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
|
||||||
|
|
||||||
|
.PHONY: tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown
|
||||||
|
tpu.ray.bootstrap:
|
||||||
|
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-bootstrap
|
||||||
|
|
||||||
|
tpu.ray.deps:
|
||||||
|
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-deps
|
||||||
|
|
||||||
|
tpu.ray.verify:
|
||||||
|
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-verify
|
||||||
|
|
||||||
|
tpu.ray.teardown:
|
||||||
|
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-teardown
|
||||||
|
|
||||||
|
.PHONY: data.pull data.push
|
||||||
|
data.pull:
|
||||||
|
python scripts/hf_data.py pull
|
||||||
|
|
||||||
|
data.push:
|
||||||
|
python scripts/hf_data.py push
|
||||||
|
|
||||||
|
.PHONY: data.whoclicked.publish
|
||||||
|
data.whoclicked.publish:
|
||||||
|
@HF_TOKEN="$(HF_TOKEN)" WHOCLICKED_REPO="$(WHOCLICKED_REPO)" WHOCLICKED_CSV="$(WHOCLICKED_CSV)" WHOCLICKED_CARD="$(WHOCLICKED_CARD)" WHOCLICKED_CSV_PATH_IN_REPO="$(WHOCLICKED_CSV_PATH_IN_REPO)" WHOCLICKED_CARD_PATH_IN_REPO="$(WHOCLICKED_CARD_PATH_IN_REPO)" WHOCLICKED_DATASET_MESSAGE="$(WHOCLICKED_DATASET_MESSAGE)" WHOCLICKED_CARD_MESSAGE="$(WHOCLICKED_CARD_MESSAGE)" $(NX) run research:whoclicked-publish
|
||||||
|
|
||||||
.PHONY: stats.lines
|
.PHONY: stats.lines
|
||||||
stats.lines:
|
stats.lines:
|
||||||
@$(NX) run research:stats
|
@$(NX) run research:stats
|
||||||
|
|
||||||
|
.PHONY: study.margin-erosion
|
||||||
|
study.margin-erosion:
|
||||||
|
python -m engine.studies.margin_erosion_alpha
|
||||||
|
|
||||||
|
.PHONY: study.margin-erosion.quick
|
||||||
|
study.margin-erosion.quick:
|
||||||
|
python -m engine.studies.margin_erosion_alpha --quick
|
||||||
|
|
||||||
.PHONY: wordcount
|
.PHONY: wordcount
|
||||||
wordcount:
|
wordcount:
|
||||||
@$(NX) run paper:wordcount
|
@$(NX) run paper:wordcount
|
||||||
@@ -185,3 +234,10 @@ count-lines:
|
|||||||
|
|
||||||
all:
|
all:
|
||||||
@$(NX) run paper:build
|
@$(NX) run paper:build
|
||||||
|
|
||||||
|
.PHONY: manim.render manim.render.all
|
||||||
|
manim.render:
|
||||||
|
@$(NX) run manim:render
|
||||||
|
|
||||||
|
manim.render.all:
|
||||||
|
@$(NX) run manim:render-all
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
### PHANTOM
|
### PHANTOM
|
||||||
|
|
||||||
|
[](https://huggingface.co/datasets/velocitatem/whoclickedit)
|
||||||
[](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml)
|
[](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml)
|
||||||
[](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
|
[](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
|
||||||
[](https://sites.research.google/trc/faq/)
|
[](https://sites.research.google/trc/faq/)
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
64 spot Cloud TPU v6e chips in zone europe-west4-a
|
|
||||||
32 spot Cloud TPU v4 chips in zone us-central2-b
|
|
||||||
64 spot Cloud TPU v5e chips in zone us-central1-a
|
|
||||||
64 spot Cloud TPU v6e chips in zone us-east1-d
|
|
||||||
32 on-demand Cloud TPU v4 chips in zone us-central2-b
|
|
||||||
64 spot Cloud TPU v5e chips in zone europe-west4-b
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
# 32 spot Cloud TPU v4 chips in zone us-central2-b
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUv4s32spotUC2B
|
|
||||||
export TPU_NAME=tpu-v4-32-uc2b-spot
|
|
||||||
export ZONE=us-central2-b
|
|
||||||
export ACCELERATOR_TYPE=v4-32
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv4
|
|
||||||
|
|
||||||
gcloud compute tpus tpu-vm create ${TPU_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--version=${RUNTIME_VERSION} \
|
|
||||||
--spot \
|
|
||||||
|| \
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION} \
|
|
||||||
--spot
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# 32 on-demand Cloud TPU v4 chips in zone us-central2-b
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUlong
|
|
||||||
export ZONE=us-central2-b
|
|
||||||
export ACCELERATOR_TYPE=v4-32
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv4
|
|
||||||
#gcloud compute tpus tpu-vm create ${TPU_NAME} --zone=${ZONE} --project=${PROJECT_ID} --accelerator-type=${ACCELERATOR_TYPE} --version=${RUNTIME_VERSION}
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
# 64 spot Cloud TPU v5e chips in zone europe-west4-b
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUv5e64spotEW4B
|
|
||||||
export TPU_NAME=tpu-v5e-64-ew4b
|
|
||||||
export ZONE=europe-west4-b
|
|
||||||
export ACCELERATOR_TYPE=v5e-64
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv5-lite
|
|
||||||
|
|
||||||
gcloud compute tpus tpu-vm create ${TPU_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--version=${RUNTIME_VERSION} \
|
|
||||||
--spot \
|
|
||||||
|| \
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION} \
|
|
||||||
--spot
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
# 64 spot Cloud TPU v5e chips in zone us-central1-a
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUv5e64spotUC1A
|
|
||||||
export TPU_NAME=tpu-v5e-64-uc1a
|
|
||||||
export ZONE=us-central1-a
|
|
||||||
export ACCELERATOR_TYPE=v5e-64
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv5-lite
|
|
||||||
|
|
||||||
gcloud compute tpus tpu-vm create ${TPU_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--version=${RUNTIME_VERSION} \
|
|
||||||
--spot \
|
|
||||||
|| \
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION} \
|
|
||||||
--spot
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
# 64 spot Cloud TPU v6e chips in zone europe-west4-a
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUv6e64spotEW4A
|
|
||||||
export TPU_NAME=tpu-v6e-64-ew4a
|
|
||||||
export ZONE=europe-west4-a
|
|
||||||
export ACCELERATOR_TYPE=v6e-64
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv6e
|
|
||||||
|
|
||||||
gcloud compute tpus tpu-vm create ${TPU_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--version=${RUNTIME_VERSION} \
|
|
||||||
--spot \
|
|
||||||
|| \
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION} \
|
|
||||||
--spot
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
# 64 spot Cloud TPU v6e chips in zone us-east1-d
|
|
||||||
export PROJECT_ID=phantom-trc
|
|
||||||
export QR_NAME=TPUv6e64spotUE1D
|
|
||||||
export TPU_NAME=tpu-v6e-64-ue1d
|
|
||||||
export ZONE=us-east1-d
|
|
||||||
export ACCELERATOR_TYPE=v6e-64
|
|
||||||
export RUNTIME_VERSION=v2-alpha-tpuv6e
|
|
||||||
|
|
||||||
gcloud compute tpus tpu-vm create ${TPU_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--version=${RUNTIME_VERSION} \
|
|
||||||
--spot \
|
|
||||||
|| \
|
|
||||||
gcloud compute tpus queued-resources create ${QR_NAME} \
|
|
||||||
--project=${PROJECT_ID} \
|
|
||||||
--zone=${ZONE} \
|
|
||||||
--node-id=${TPU_NAME} \
|
|
||||||
--accelerator-type=${ACCELERATOR_TYPE} \
|
|
||||||
--runtime-version=${RUNTIME_VERSION} \
|
|
||||||
--spot
|
|
||||||
@@ -1,4 +1,23 @@
|
|||||||
services:
|
services:
|
||||||
|
tpu-watchdogs:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/TPUWatchdog.dockerfile
|
||||||
|
container_name: "PHANTOM-tpu-watchdogs"
|
||||||
|
restart: unless-stopped
|
||||||
|
user: "${UID:-1000}:${GID:-1000}"
|
||||||
|
environment:
|
||||||
|
- HF_TOKEN=${HF_TOKEN}
|
||||||
|
- WANDB_API_KEY=${WANDB_API_KEY}
|
||||||
|
- GITHUB_TOKEN=${GITHUB_TOKEN}
|
||||||
|
- GOOGLE_APPLICATION_CREDENTIALS=/secrets/gcp-sa.json
|
||||||
|
- GCP_ACCOUNT=${GCP_ACCOUNT:-}
|
||||||
|
- WATCHDOG_CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-v[46]*.conf}
|
||||||
|
- CLOUDSDK_CONFIG=/.config/gcloud
|
||||||
|
volumes:
|
||||||
|
- ~/.config/gcloud:/.config/gcloud:rw
|
||||||
|
- ./secrets/gcp-sa.json:/secrets/gcp-sa.json:ro
|
||||||
|
|
||||||
tensorboard-rl:
|
tensorboard-rl:
|
||||||
image: tensorflow/tensorflow:latest
|
image: tensorflow/tensorflow:latest
|
||||||
container_name: "PHANTOM-tensorboard-rl"
|
container_name: "PHANTOM-tensorboard-rl"
|
||||||
|
|||||||
112
docker/TPUWatchdog.dockerfile
Normal file
112
docker/TPUWatchdog.dockerfile
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
FROM google/cloud-sdk:slim
|
||||||
|
|
||||||
|
# Install tmux to manage multiple watchdogs and jq for json parsing
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y tmux jq && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the orchestration scripts and configs
|
||||||
|
COPY tpu_orchestration/ /app/tpu_orchestration/
|
||||||
|
|
||||||
|
# Make sure scripts are executable
|
||||||
|
RUN chmod +x /app/tpu_orchestration/watchdog.sh
|
||||||
|
RUN chmod +x /app/tpu_orchestration/tpu_startup.sh
|
||||||
|
|
||||||
|
# Create an entrypoint script that launches a watchdog for each config
|
||||||
|
COPY <<-'EOF' /app/entrypoint.sh
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Make sure required variables are set
|
||||||
|
if [ -z "$HF_TOKEN" ]; then
|
||||||
|
echo "Error: HF_TOKEN environment variable is required."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$WANDB_API_KEY" ]; then
|
||||||
|
echo "Warning: WANDB_API_KEY environment variable is not set. Wandb logging may fail on TPUs."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Authenticate gcloud if credentials are provided
|
||||||
|
if [ -n "$GOOGLE_APPLICATION_CREDENTIALS" ] && [ -f "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
|
||||||
|
CRED_TYPE=$(jq -r '.type' "$GOOGLE_APPLICATION_CREDENTIALS" 2>/dev/null || echo "unknown")
|
||||||
|
if [ "$CRED_TYPE" = "service_account" ]; then
|
||||||
|
echo "Authenticating gcloud using service account key..."
|
||||||
|
gcloud auth activate-service-account --key-file="$GOOGLE_APPLICATION_CREDENTIALS"
|
||||||
|
|
||||||
|
if [ -z "$PROJECT_ID" ]; then
|
||||||
|
PROJECT_ID=$(jq -r '.project_id // empty' "$GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
fi
|
||||||
|
elif [ "$CRED_TYPE" = "authorized_user" ]; then
|
||||||
|
echo "Using authorized_user credentials via credential file override..."
|
||||||
|
export CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE="$GOOGLE_APPLICATION_CREDENTIALS"
|
||||||
|
|
||||||
|
if gcloud auth print-access-token >/dev/null 2>&1; then
|
||||||
|
ACTIVE_ACCOUNT=$(gcloud config get-value account 2>/dev/null || true)
|
||||||
|
if [ -z "$ACTIVE_ACCOUNT" ] || [ "$ACTIVE_ACCOUNT" = "(unset)" ]; then
|
||||||
|
ACTIVE_ACCOUNT=$(jq -r '.account // empty' "$GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$ACTIVE_ACCOUNT" ] && [ "$ACTIVE_ACCOUNT" != "(unset)" ]; then
|
||||||
|
echo "Using gcloud account: $ACTIVE_ACCOUNT"
|
||||||
|
else
|
||||||
|
echo "Using gcloud credential override from $GOOGLE_APPLICATION_CREDENTIALS"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Warning: credential file override token check failed. Falling back to mounted gcloud config."
|
||||||
|
unset CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
|
||||||
|
|
||||||
|
if [ -n "$GCP_ACCOUNT" ]; then
|
||||||
|
gcloud config set account "$GCP_ACCOUNT" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
ACTIVE_ACCOUNT=$(gcloud config get-value account 2>/dev/null || true)
|
||||||
|
if [ -z "$ACTIVE_ACCOUNT" ] || [ "$ACTIVE_ACCOUNT" = "(unset)" ]; then
|
||||||
|
echo "Error: no active gcloud account available. Run 'gcloud auth login' on host and mount ~/.config/gcloud, or use a service account key."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Using gcloud account: $ACTIVE_ACCOUNT"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Warning: unsupported credential file type '$CRED_TYPE'. Falling back to mounted gcloud config."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Note: Assuming gcloud config is mounted from host."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$PROJECT_ID" ]; then
|
||||||
|
gcloud config set project "$PROJECT_ID"
|
||||||
|
echo "Set project to $PROJECT_ID"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run the watchdogs in the background using bash instead of tmux
|
||||||
|
# Tmux needs a TTY to attach properly which we might not have in docker
|
||||||
|
# Stagger startups by 15s to prevent simultaneous TPU creation quota hits
|
||||||
|
CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-"*.conf"}
|
||||||
|
shopt -s nullglob
|
||||||
|
CONFIGS=(/app/tpu_orchestration/configs/$CONFIG_PATTERN)
|
||||||
|
|
||||||
|
if [ ${#CONFIGS[@]} -eq 0 ]; then
|
||||||
|
echo "Error: no watchdog configs matched pattern '$CONFIG_PATTERN'."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Using watchdog config pattern: $CONFIG_PATTERN"
|
||||||
|
DELAY=0
|
||||||
|
for conf in "${CONFIGS[@]}"; do
|
||||||
|
echo "Starting watchdog for $(basename "$conf" .conf) (delay: ${DELAY}s)"
|
||||||
|
(sleep $DELAY && /app/tpu_orchestration/watchdog.sh "$conf") &
|
||||||
|
DELAY=$((DELAY + 15))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "All watchdogs queued with staggered startup."
|
||||||
|
|
||||||
|
# Keep the container running
|
||||||
|
wait
|
||||||
|
EOF
|
||||||
|
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
CMD ["/app/entrypoint.sh"]
|
||||||
@@ -272,12 +272,12 @@
|
|||||||
</span>
|
</span>
|
||||||
|
|
||||||
<span class="link-block">
|
<span class="link-block">
|
||||||
<a href="goals/goals.csv" target="_blank"
|
<a href="https://huggingface.co/datasets/velocitatem/whoclickedit" target="_blank"
|
||||||
class="external-link button is-normal is-rounded is-dark">
|
class="external-link button is-normal is-rounded is-dark">
|
||||||
<span class="icon">
|
<span class="icon">
|
||||||
<i class="fas fa-list"></i>
|
<i class="fas fa-database"></i>
|
||||||
</span>
|
</span>
|
||||||
<span>Goal Set</span>
|
<span>Dataset</span>
|
||||||
</a>
|
</a>
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
|
|||||||
0
engine/__init__.py
Normal file
0
engine/__init__.py
Normal file
@@ -15,6 +15,10 @@ def make_env(cfg: Mapping[str, Any]):
|
|||||||
n_products=int(cfg["n_products"]),
|
n_products=int(cfg["n_products"]),
|
||||||
alpha=float(cfg["alpha"]),
|
alpha=float(cfg["alpha"]),
|
||||||
N=int(cfg["N"]),
|
N=int(cfg["N"]),
|
||||||
|
agent_params=(
|
||||||
|
float(cfg.get("agent_mu", 45.0)),
|
||||||
|
float(cfg.get("agent_std", 15.0)),
|
||||||
|
),
|
||||||
price_bounds=(float(cfg["price_low"]), float(cfg["price_high"])),
|
price_bounds=(float(cfg["price_low"]), float(cfg["price_high"])),
|
||||||
lambda_coi=float(cfg["lambda_coi"]),
|
lambda_coi=float(cfg["lambda_coi"]),
|
||||||
robust_radius=float(cfg["robust_radius"]),
|
robust_radius=float(cfg["robust_radius"]),
|
||||||
@@ -50,6 +54,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
coi_levels: list[float] = []
|
coi_levels: list[float] = []
|
||||||
coi_leakages: list[float] = []
|
coi_leakages: list[float] = []
|
||||||
volatilities: list[float] = []
|
volatilities: list[float] = []
|
||||||
|
upward_volatilities: list[float] = []
|
||||||
|
supra_shares: list[float] = []
|
||||||
|
supra_penalties: list[float] = []
|
||||||
agent_probs: list[float] = []
|
agent_probs: list[float] = []
|
||||||
|
|
||||||
for _ in range(int(episodes)):
|
for _ in range(int(episodes)):
|
||||||
@@ -61,6 +68,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
ep_coi = 0.0
|
ep_coi = 0.0
|
||||||
ep_coi_leakage = 0.0
|
ep_coi_leakage = 0.0
|
||||||
ep_volatility = 0.0
|
ep_volatility = 0.0
|
||||||
|
ep_upward_volatility = 0.0
|
||||||
|
ep_supra_share = 0.0
|
||||||
|
ep_supra_penalty = 0.0
|
||||||
ep_agent_prob = 0.0
|
ep_agent_prob = 0.0
|
||||||
steps = 0
|
steps = 0
|
||||||
|
|
||||||
@@ -74,6 +84,15 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
ep_coi += float(econ.get("coi_level", 0.0))
|
ep_coi += float(econ.get("coi_level", 0.0))
|
||||||
ep_coi_leakage += float(econ.get("coi_leakage", 0.0))
|
ep_coi_leakage += float(econ.get("coi_leakage", 0.0))
|
||||||
ep_volatility += float(econ.get("volatility", 0.0))
|
ep_volatility += float(econ.get("volatility", 0.0))
|
||||||
|
ep_upward_volatility += float(
|
||||||
|
info.get("upward_volatility", econ.get("upward_volatility", 0.0))
|
||||||
|
)
|
||||||
|
ep_supra_share += float(
|
||||||
|
info.get("supra_share", econ.get("supra_share", 0.0))
|
||||||
|
)
|
||||||
|
ep_supra_penalty += float(
|
||||||
|
info.get("supra_penalty", econ.get("supra_penalty", 0.0))
|
||||||
|
)
|
||||||
ep_agent_prob += float(econ.get("agent_prob", info.get("agent_prob", 0.0)))
|
ep_agent_prob += float(econ.get("agent_prob", info.get("agent_prob", 0.0)))
|
||||||
steps += 1
|
steps += 1
|
||||||
|
|
||||||
@@ -84,6 +103,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
coi_levels.append(ep_coi / denom)
|
coi_levels.append(ep_coi / denom)
|
||||||
coi_leakages.append(ep_coi_leakage / denom)
|
coi_leakages.append(ep_coi_leakage / denom)
|
||||||
volatilities.append(ep_volatility / denom)
|
volatilities.append(ep_volatility / denom)
|
||||||
|
upward_volatilities.append(ep_upward_volatility / denom)
|
||||||
|
supra_shares.append(ep_supra_share / denom)
|
||||||
|
supra_penalties.append(ep_supra_penalty / denom)
|
||||||
agent_probs.append(ep_agent_prob / denom)
|
agent_probs.append(ep_agent_prob / denom)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -95,6 +117,13 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
"eval/coi_level_mean": float(np.mean(coi_levels)) if coi_levels else 0.0,
|
"eval/coi_level_mean": float(np.mean(coi_levels)) if coi_levels else 0.0,
|
||||||
"eval/coi_leakage_mean": float(np.mean(coi_leakages)) if coi_leakages else 0.0,
|
"eval/coi_leakage_mean": float(np.mean(coi_leakages)) if coi_leakages else 0.0,
|
||||||
"eval/volatility_mean": float(np.mean(volatilities)) if volatilities else 0.0,
|
"eval/volatility_mean": float(np.mean(volatilities)) if volatilities else 0.0,
|
||||||
|
"eval/upward_volatility_mean": (
|
||||||
|
float(np.mean(upward_volatilities)) if upward_volatilities else 0.0
|
||||||
|
),
|
||||||
|
"eval/supra_share_mean": float(np.mean(supra_shares)) if supra_shares else 0.0,
|
||||||
|
"eval/supra_penalty_mean": (
|
||||||
|
float(np.mean(supra_penalties)) if supra_penalties else 0.0
|
||||||
|
),
|
||||||
"eval/agent_prob_mean": float(np.mean(agent_probs)) if agent_probs else 0.0,
|
"eval/agent_prob_mean": float(np.mean(agent_probs)) if agent_probs else 0.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,15 +157,15 @@ def evaluate(
|
|||||||
shifted_env.close()
|
shifted_env.close()
|
||||||
shifted_rows.append((tag, alpha, shifted_metrics))
|
shifted_rows.append((tag, alpha, shifted_metrics))
|
||||||
|
|
||||||
metrics["eval/robust_alpha_low"] = low_alpha
|
metrics["eval/stress_alpha_low"] = low_alpha
|
||||||
metrics["eval/robust_alpha_high"] = high_alpha
|
metrics["eval/stress_alpha_high"] = high_alpha
|
||||||
metrics["eval/robust_reward_worst"] = float(
|
metrics["eval/stress_reward_worst"] = float(
|
||||||
min(row[2]["eval/reward_mean"] for row in shifted_rows)
|
min(row[2]["eval/reward_mean"] for row in shifted_rows)
|
||||||
)
|
)
|
||||||
metrics["eval/robust_revenue_worst"] = float(
|
metrics["eval/stress_revenue_worst"] = float(
|
||||||
min(row[2]["eval/revenue_mean"] for row in shifted_rows)
|
min(row[2]["eval/revenue_mean"] for row in shifted_rows)
|
||||||
)
|
)
|
||||||
metrics["eval/robust_coi_leakage_worst"] = float(
|
metrics["eval/stress_coi_leakage_worst"] = float(
|
||||||
max(row[2]["eval/coi_leakage_mean"] for row in shifted_rows)
|
max(row[2]["eval/coi_leakage_mean"] for row in shifted_rows)
|
||||||
)
|
)
|
||||||
for tag, alpha, shifted_metrics in shifted_rows:
|
for tag, alpha, shifted_metrics in shifted_rows:
|
||||||
|
|||||||
@@ -80,7 +80,11 @@ def train_qtable(
|
|||||||
"train/global_step": int(steps),
|
"train/global_step": int(steps),
|
||||||
}
|
}
|
||||||
if wandb_live:
|
if wandb_live:
|
||||||
|
try:
|
||||||
wandb.log(dict(event), step=step_offset + int(steps))
|
wandb.log(dict(event), step=step_offset + int(steps))
|
||||||
|
except Exception:
|
||||||
|
wandb_live = False
|
||||||
|
train_events.append(event)
|
||||||
else:
|
else:
|
||||||
train_events.append(event)
|
train_events.append(event)
|
||||||
if console_progress:
|
if console_progress:
|
||||||
@@ -113,7 +117,11 @@ def train_qtable(
|
|||||||
"train/global_step": int(steps),
|
"train/global_step": int(steps),
|
||||||
}
|
}
|
||||||
if wandb_live:
|
if wandb_live:
|
||||||
|
try:
|
||||||
wandb.log(dict(tail_event), step=step_offset + int(steps))
|
wandb.log(dict(tail_event), step=step_offset + int(steps))
|
||||||
|
except Exception:
|
||||||
|
wandb_live = False
|
||||||
|
train_events.append(tail_event)
|
||||||
else:
|
else:
|
||||||
train_events.append(tail_event)
|
train_events.append(tail_event)
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Mapping
|
from typing import Any, Mapping
|
||||||
|
|
||||||
from ..lib.callbacks import MetricsCallback
|
from ..lib.callbacks import EvalMetricsCallback, MetricsCallback
|
||||||
|
from ..wandb_checkpoint import checkpoint_artifact_name, log_checkpoint_file
|
||||||
from .common import evaluate, make_env
|
from .common import evaluate, make_env
|
||||||
|
|
||||||
|
|
||||||
@@ -117,7 +119,6 @@ def build_model(cfg: Mapping[str, Any], env: Any):
|
|||||||
|
|
||||||
def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
||||||
try:
|
try:
|
||||||
from stable_baselines3.common.callbacks import EvalCallback
|
|
||||||
from stable_baselines3.common.monitor import Monitor
|
from stable_baselines3.common.monitor import Monitor
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
raise ImportError("stable-baselines3 is required for SB3 models") from exc
|
raise ImportError("stable-baselines3 is required for SB3 models") from exc
|
||||||
@@ -144,20 +145,20 @@ def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
metrics_callback = MetricsCallback(
|
metrics_callback = MetricsCallback(
|
||||||
log_histograms=False,
|
log_histograms=True,
|
||||||
log_freq=int(cfg["log_freq"]),
|
log_freq=int(cfg["log_freq"]),
|
||||||
|
hist_freq=int(cfg.get("hist_freq", 500)),
|
||||||
step_offset=int(cfg.get("wandb_step_offset", 0)),
|
step_offset=int(cfg.get("wandb_step_offset", 0)),
|
||||||
)
|
)
|
||||||
callbacks = [metrics_callback]
|
eval_callback = EvalMetricsCallback(
|
||||||
callbacks.append(
|
|
||||||
EvalCallback(
|
|
||||||
eval_env,
|
eval_env,
|
||||||
eval_freq=int(cfg["eval_freq"]),
|
eval_freq=int(cfg["eval_freq"]),
|
||||||
n_eval_episodes=int(cfg["eval_episodes"]),
|
n_eval_episodes=int(cfg["eval_episodes"]),
|
||||||
|
step_offset=int(cfg.get("wandb_step_offset", 0)),
|
||||||
deterministic=True,
|
deterministic=True,
|
||||||
verbose=0,
|
verbose=0,
|
||||||
)
|
)
|
||||||
)
|
callbacks = [metrics_callback, eval_callback]
|
||||||
|
|
||||||
target_steps = int(cfg["total_timesteps"])
|
target_steps = int(cfg["total_timesteps"])
|
||||||
remaining_steps = max(0, target_steps - int(getattr(model, "num_timesteps", 0)))
|
remaining_steps = max(0, target_steps - int(getattr(model, "num_timesteps", 0)))
|
||||||
@@ -173,6 +174,29 @@ def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
|||||||
model_path = model_dir / f"phantom_{cfg['algo']}"
|
model_path = model_dir / f"phantom_{cfg['algo']}"
|
||||||
model.save(str(model_path))
|
model.save(str(model_path))
|
||||||
|
|
||||||
|
artifact_name = checkpoint_artifact_name(
|
||||||
|
cfg,
|
||||||
|
backend="sb3",
|
||||||
|
sweep_id=os.getenv("WANDB_SWEEP_ID"),
|
||||||
|
)
|
||||||
|
artifact_logged = False
|
||||||
|
try:
|
||||||
|
artifact_logged = bool(
|
||||||
|
log_checkpoint_file(
|
||||||
|
artifact_name,
|
||||||
|
file_path=model_path.with_suffix(".zip"),
|
||||||
|
artifact_file_name="model.zip",
|
||||||
|
metadata={
|
||||||
|
"algo": str(cfg.get("algo", "ppo")),
|
||||||
|
"backend": "sb3",
|
||||||
|
"seed": int(cfg.get("seed", 0)),
|
||||||
|
"step": int(getattr(model, "num_timesteps", 0)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
artifact_logged = False
|
||||||
|
|
||||||
metrics: dict[str, Any] = evaluate(
|
metrics: dict[str, Any] = evaluate(
|
||||||
model,
|
model,
|
||||||
eval_env,
|
eval_env,
|
||||||
@@ -181,7 +205,12 @@ def train_sb3(cfg: Mapping[str, Any]) -> tuple[object, dict[str, Any]]:
|
|||||||
)
|
)
|
||||||
metrics["train/global_step"] = int(model.num_timesteps)
|
metrics["train/global_step"] = int(model.num_timesteps)
|
||||||
metrics["model/path"] = str(model_path.with_suffix(".zip"))
|
metrics["model/path"] = str(model_path.with_suffix(".zip"))
|
||||||
metrics["_train_events"] = list(metrics_callback.events)
|
metrics["model/artifact_name"] = str(artifact_name)
|
||||||
|
metrics["model/artifact_logged"] = float(artifact_logged)
|
||||||
|
metrics["_train_events"] = sorted(
|
||||||
|
[*metrics_callback.events, *eval_callback.events],
|
||||||
|
key=lambda event: int(event.get("train/global_step", 0)),
|
||||||
|
)
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
eval_env.close()
|
eval_env.close()
|
||||||
|
|||||||
@@ -1,12 +1,32 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
from datetime import datetime, timezone
|
||||||
from datetime import datetime, UTC
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# clear stale TPU locks on startup
|
||||||
|
if os.path.exists("/dev/accel0"):
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
["rm", "-f", "/tmp/.libtpu_lockfile", "/tmp/libtpu_lockfile"],
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jax
|
||||||
|
|
||||||
|
jax.config.update("jax_threefry_partitionable", True)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -25,6 +45,10 @@ def _log(message: str) -> None:
|
|||||||
logger.info(message)
|
logger.info(message)
|
||||||
|
|
||||||
|
|
||||||
|
def _wandb_run_active() -> bool:
|
||||||
|
return bool(HAS_WANDB and getattr(wandb, "run", None) is not None)
|
||||||
|
|
||||||
|
|
||||||
def _parse_list(raw: str) -> list[str]:
|
def _parse_list(raw: str) -> list[str]:
|
||||||
return [x.strip().lower() for x in str(raw).split(",") if x.strip()]
|
return [x.strip().lower() for x in str(raw).split(",") if x.strip()]
|
||||||
|
|
||||||
@@ -41,6 +65,10 @@ def _truthy(value: str | bool | None) -> bool:
|
|||||||
return str(value).strip().lower() in {"1", "true", "yes", "on"}
|
return str(value).strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
|
|
||||||
|
def _mode_label_from_baseline(is_baseline: bool) -> str:
|
||||||
|
return "baseline" if bool(is_baseline) else "defended"
|
||||||
|
|
||||||
|
|
||||||
def _action(policy, obs: np.ndarray):
|
def _action(policy, obs: np.ndarray):
|
||||||
out = policy.predict(obs, deterministic=True)
|
out = policy.predict(obs, deterministic=True)
|
||||||
action = out[0] if isinstance(out, tuple) else out
|
action = out[0] if isinstance(out, tuple) else out
|
||||||
@@ -146,7 +174,7 @@ def _log_train_events(
|
|||||||
alpha: float,
|
alpha: float,
|
||||||
step_offset: int,
|
step_offset: int,
|
||||||
) -> int:
|
) -> int:
|
||||||
if not (HAS_WANDB and wandb.run is not None):
|
if not _wandb_run_active():
|
||||||
return int(step_offset)
|
return int(step_offset)
|
||||||
if not events:
|
if not events:
|
||||||
return int(step_offset)
|
return int(step_offset)
|
||||||
@@ -167,11 +195,14 @@ def _log_train_events(
|
|||||||
"run.kind": "benchmark",
|
"run.kind": "benchmark",
|
||||||
"runtime/backend": tier_name,
|
"runtime/backend": tier_name,
|
||||||
"study/mode": mode_label,
|
"study/mode": mode_label,
|
||||||
"study/no_robust": float(mode_label == "no_robust"),
|
"study/baseline_mode": float(mode_label == "baseline"),
|
||||||
"study/alpha": float(alpha),
|
"study/alpha": float(alpha),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
wandb.log(payload, step=cursor + rel_step)
|
wandb.log(payload, step=cursor + rel_step)
|
||||||
|
except Exception:
|
||||||
|
return int(step_offset)
|
||||||
max_rel = max(max(1, int(evt.get("train/global_step", 0))) for evt in ordered)
|
max_rel = max(max(1, int(evt.get("train/global_step", 0))) for evt in ordered)
|
||||||
return cursor + max_rel + 1
|
return cursor + max_rel + 1
|
||||||
|
|
||||||
@@ -183,6 +214,7 @@ def run_benchmark(
|
|||||||
n_episodes: int,
|
n_episodes: int,
|
||||||
mode_label: str,
|
mode_label: str,
|
||||||
step_cursor_start: int = 0,
|
step_cursor_start: int = 0,
|
||||||
|
eval_alpha_values: list[float] | None = None,
|
||||||
):
|
):
|
||||||
from .backends.common import make_env
|
from .backends.common import make_env
|
||||||
|
|
||||||
@@ -219,14 +251,22 @@ def run_benchmark(
|
|||||||
"dqn",
|
"dqn",
|
||||||
}:
|
}:
|
||||||
wandb_step_cursor += max(1, int(cfg.get("total_timesteps", 1))) + 1
|
wandb_step_cursor += max(1, int(cfg.get("total_timesteps", 1))) + 1
|
||||||
env = make_env({**cfg, "alpha": float(alpha)})
|
eval_targets = (
|
||||||
|
[float(value) for value in eval_alpha_values]
|
||||||
|
if eval_alpha_values
|
||||||
|
else [float(alpha)]
|
||||||
|
)
|
||||||
|
for eval_alpha in eval_targets:
|
||||||
|
env = make_env({**cfg, "alpha": float(eval_alpha)})
|
||||||
eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))]
|
eps = [_run_eval_episode(env, policy) for _ in range(int(n_episodes))]
|
||||||
env.close()
|
env.close()
|
||||||
|
|
||||||
row = {
|
row = {
|
||||||
"tier": tier_name,
|
"tier": tier_name,
|
||||||
"mode": mode_label,
|
"mode": mode_label,
|
||||||
"alpha": float(alpha),
|
"alpha": float(eval_alpha),
|
||||||
|
"train_alpha": float(alpha),
|
||||||
|
"eval_alpha": float(eval_alpha),
|
||||||
"episodes": int(n_episodes),
|
"episodes": int(n_episodes),
|
||||||
"mean_reward": float(np.mean([e["reward"] for e in eps])),
|
"mean_reward": float(np.mean([e["reward"] for e in eps])),
|
||||||
"mean_revenue": float(np.mean([e["revenue"] for e in eps])),
|
"mean_revenue": float(np.mean([e["revenue"] for e in eps])),
|
||||||
@@ -237,7 +277,8 @@ def run_benchmark(
|
|||||||
row["objective_score"] = row["mean_reward"]
|
row["objective_score"] = row["mean_reward"]
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
_log(
|
_log(
|
||||||
f"[{run_index}/{total_runs}] alpha={float(alpha):.2f} tier={tier_name}: "
|
f"[{run_index}/{total_runs}] train_alpha={float(alpha):.2f} "
|
||||||
|
f"eval_alpha={float(eval_alpha):.2f} tier={tier_name}: "
|
||||||
f"reward={row['mean_reward']:.3f} revenue={row['mean_revenue']:.3f} "
|
f"reward={row['mean_reward']:.3f} revenue={row['mean_revenue']:.3f} "
|
||||||
f"coi={row['mean_coi']:.4f} score={row['objective_score']:.3f}"
|
f"coi={row['mean_coi']:.4f} score={row['objective_score']:.3f}"
|
||||||
)
|
)
|
||||||
@@ -246,25 +287,32 @@ def run_benchmark(
|
|||||||
step_means = []
|
step_means = []
|
||||||
for step in range(max_len):
|
for step in range(max_len):
|
||||||
vals = [
|
vals = [
|
||||||
e["price_trace"][step] for e in eps if step < len(e["price_trace"])
|
e["price_trace"][step]
|
||||||
|
for e in eps
|
||||||
|
if step < len(e["price_trace"])
|
||||||
]
|
]
|
||||||
step_means.append(float(np.mean(vals)) if vals else np.nan)
|
step_means.append(float(np.mean(vals)) if vals else np.nan)
|
||||||
traces.append(
|
traces.append(
|
||||||
{
|
{
|
||||||
"tier": tier_name,
|
"tier": tier_name,
|
||||||
"alpha": float(alpha),
|
"alpha": float(eval_alpha),
|
||||||
|
"train_alpha": float(alpha),
|
||||||
|
"eval_alpha": float(eval_alpha),
|
||||||
"mean_price_trace": step_means,
|
"mean_price_trace": step_means,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if HAS_WANDB and wandb.run is not None:
|
if _wandb_run_active():
|
||||||
|
try:
|
||||||
wandb.log(
|
wandb.log(
|
||||||
{
|
{
|
||||||
"run.kind": "benchmark",
|
"run.kind": "benchmark",
|
||||||
"runtime/backend": tier_name,
|
"runtime/backend": tier_name,
|
||||||
"study/mode": mode_label,
|
"study/mode": mode_label,
|
||||||
"study/no_robust": float(mode_label == "no_robust"),
|
"study/baseline_mode": float(mode_label == "baseline"),
|
||||||
"study/alpha": float(alpha),
|
"study/alpha": float(eval_alpha),
|
||||||
|
"study/train_alpha": float(alpha),
|
||||||
|
"study/eval_alpha": float(eval_alpha),
|
||||||
"eval/reward_mean": row["mean_reward"],
|
"eval/reward_mean": row["mean_reward"],
|
||||||
"eval/revenue_mean": row["mean_revenue"],
|
"eval/revenue_mean": row["mean_revenue"],
|
||||||
"eval/margin_mean": row["mean_margin"],
|
"eval/margin_mean": row["mean_margin"],
|
||||||
@@ -274,6 +322,8 @@ def run_benchmark(
|
|||||||
},
|
},
|
||||||
step=wandb_step_cursor,
|
step=wandb_step_cursor,
|
||||||
)
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
wandb_step_cursor += 1
|
wandb_step_cursor += 1
|
||||||
|
|
||||||
return pd.DataFrame(rows), traces, int(wandb_step_cursor)
|
return pd.DataFrame(rows), traces, int(wandb_step_cursor)
|
||||||
@@ -358,7 +408,7 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
if compare_robust_override is not None
|
if compare_robust_override is not None
|
||||||
else _truthy(os.environ.get("PHANTOM_BENCHMARK_COMPARE_ROBUST"))
|
else _truthy(os.environ.get("PHANTOM_BENCHMARK_COMPARE_ROBUST"))
|
||||||
)
|
)
|
||||||
robust_modes = [False, True] if compare_robust else [bool(args.no_robust)]
|
baseline_modes = [False, True] if compare_robust else [bool(args.no_robust)]
|
||||||
|
|
||||||
base_overrides = {
|
base_overrides = {
|
||||||
"seed": args.seed,
|
"seed": args.seed,
|
||||||
@@ -369,6 +419,7 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
"robust_radius": args.robust_radius,
|
"robust_radius": args.robust_radius,
|
||||||
"robust_points": args.robust_points,
|
"robust_points": args.robust_points,
|
||||||
"robust_rollouts": args.robust_rollouts,
|
"robust_rollouts": args.robust_rollouts,
|
||||||
|
"margin_floor": args.margin_floor,
|
||||||
"eta_ux": args.eta_ux,
|
"eta_ux": args.eta_ux,
|
||||||
"reward_profit_weight": args.reward_profit_weight,
|
"reward_profit_weight": args.reward_profit_weight,
|
||||||
"price_low": args.price_low,
|
"price_low": args.price_low,
|
||||||
@@ -385,12 +436,20 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
}
|
}
|
||||||
tiers = _parse_list(args.tiers)
|
tiers = _parse_list(args.tiers)
|
||||||
alpha_values = _parse_float_list(args.alpha_values)
|
alpha_values = _parse_float_list(args.alpha_values)
|
||||||
|
eval_alpha_values = (
|
||||||
|
_parse_float_list(args.eval_alpha_values)
|
||||||
|
if str(getattr(args, "eval_alpha_values", "")).strip()
|
||||||
|
else []
|
||||||
|
)
|
||||||
_log(
|
_log(
|
||||||
"starting run "
|
"starting run "
|
||||||
+ json.dumps(
|
+ json.dumps(
|
||||||
{
|
{
|
||||||
"tiers": tiers,
|
"tiers": tiers,
|
||||||
"alpha_values": alpha_values,
|
"alpha_values": alpha_values,
|
||||||
|
"eval_alpha_values": (
|
||||||
|
eval_alpha_values if eval_alpha_values else alpha_values
|
||||||
|
),
|
||||||
"episodes": int(args.episodes),
|
"episodes": int(args.episodes),
|
||||||
"total_timesteps": int(args.total_timesteps),
|
"total_timesteps": int(args.total_timesteps),
|
||||||
"device": str(args.device),
|
"device": str(args.device),
|
||||||
@@ -401,14 +460,14 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
all_frames: list[pd.DataFrame] = []
|
all_frames: list[pd.DataFrame] = []
|
||||||
all_traces: list[dict] = []
|
all_traces: list[dict] = []
|
||||||
wandb_step_cursor = 0
|
wandb_step_cursor = 0
|
||||||
for no_robust in robust_modes:
|
for baseline_mode in baseline_modes:
|
||||||
overrides = dict(base_overrides)
|
overrides = dict(base_overrides)
|
||||||
overrides["no_robust"] = bool(no_robust)
|
overrides["baseline_mode"] = bool(baseline_mode)
|
||||||
cfg = TrainSpec.from_flat(
|
cfg = TrainSpec.from_flat(
|
||||||
{k: v for k, v in overrides.items() if v is not None}
|
{k: v for k, v in overrides.items() if v is not None}
|
||||||
).to_flat_dict()
|
).to_flat_dict()
|
||||||
cfg["linear_warmup_steps"] = int(args.linear_warmup_steps)
|
cfg["linear_warmup_steps"] = int(args.linear_warmup_steps)
|
||||||
mode_label = "no_robust" if no_robust else "robust"
|
mode_label = _mode_label_from_baseline(bool(baseline_mode))
|
||||||
_log(f"mode={mode_label}: begin")
|
_log(f"mode={mode_label}: begin")
|
||||||
df_mode, traces_mode, wandb_step_cursor = run_benchmark(
|
df_mode, traces_mode, wandb_step_cursor = run_benchmark(
|
||||||
cfg,
|
cfg,
|
||||||
@@ -417,6 +476,7 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
args.episodes,
|
args.episodes,
|
||||||
mode_label=mode_label,
|
mode_label=mode_label,
|
||||||
step_cursor_start=wandb_step_cursor,
|
step_cursor_start=wandb_step_cursor,
|
||||||
|
eval_alpha_values=eval_alpha_values,
|
||||||
)
|
)
|
||||||
_log(f"mode={mode_label}: complete ({len(df_mode)} rows)")
|
_log(f"mode={mode_label}: complete ({len(df_mode)} rows)")
|
||||||
for trace in traces_mode:
|
for trace in traces_mode:
|
||||||
@@ -429,7 +489,7 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
|
|
||||||
out_dir = Path(args.output_dir)
|
out_dir = Path(args.output_dir)
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
stamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||||
csv_path = out_dir / f"benchmark_{stamp}.csv"
|
csv_path = out_dir / f"benchmark_{stamp}.csv"
|
||||||
trace_path = out_dir / f"benchmark_traces_{stamp}.json"
|
trace_path = out_dir / f"benchmark_traces_{stamp}.json"
|
||||||
df.to_csv(csv_path, index=False)
|
df.to_csv(csv_path, index=False)
|
||||||
@@ -445,7 +505,7 @@ def _run_with_args(args, compare_robust_override: bool | None = None):
|
|||||||
+ json.dumps(
|
+ json.dumps(
|
||||||
{
|
{
|
||||||
"tier": best["tier"],
|
"tier": best["tier"],
|
||||||
"mode": best.get("mode", "robust"),
|
"mode": best.get("mode", "defended"),
|
||||||
"alpha": float(best["alpha"]),
|
"alpha": float(best["alpha"]),
|
||||||
"objective_score": float(best["objective_score"]),
|
"objective_score": float(best["objective_score"]),
|
||||||
"mean_revenue": float(best["mean_revenue"]),
|
"mean_revenue": float(best["mean_revenue"]),
|
||||||
@@ -466,6 +526,7 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
parser.add_argument("--project", default="capstone")
|
parser.add_argument("--project", default="capstone")
|
||||||
parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo")
|
parser.add_argument("--tiers", default="static,surge,linear,qtable,ppo")
|
||||||
parser.add_argument("--alpha-values", default="0.0,0.3,0.6")
|
parser.add_argument("--alpha-values", default="0.0,0.3,0.6")
|
||||||
|
parser.add_argument("--eval-alpha-values", default="")
|
||||||
parser.add_argument("--episodes", type=int, default=10)
|
parser.add_argument("--episodes", type=int, default=10)
|
||||||
parser.add_argument("--output-dir", default="engine/studies/results")
|
parser.add_argument("--output-dir", default="engine/studies/results")
|
||||||
parser.add_argument("--seed", type=int, default=42)
|
parser.add_argument("--seed", type=int, default=42)
|
||||||
@@ -476,6 +537,7 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
parser.add_argument("--robust-radius", type=float, default=0.15)
|
parser.add_argument("--robust-radius", type=float, default=0.15)
|
||||||
parser.add_argument("--robust-points", type=int, default=5)
|
parser.add_argument("--robust-points", type=int, default=5)
|
||||||
parser.add_argument("--robust-rollouts", type=int, default=1)
|
parser.add_argument("--robust-rollouts", type=int, default=1)
|
||||||
|
parser.add_argument("--margin-floor", type=float, default=0.85)
|
||||||
parser.add_argument("--eta-ux", type=float, default=0.5)
|
parser.add_argument("--eta-ux", type=float, default=0.5)
|
||||||
parser.add_argument("--reward-profit-weight", type=float, default=1.0)
|
parser.add_argument("--reward-profit-weight", type=float, default=1.0)
|
||||||
parser.add_argument("--price-low", type=float, default=10.0)
|
parser.add_argument("--price-low", type=float, default=10.0)
|
||||||
@@ -509,35 +571,47 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
key_to_attr = {
|
key_to_attr = {
|
||||||
"tiers": "tiers",
|
"tiers": "tiers",
|
||||||
"alpha_values": "alpha_values",
|
"alpha_values": "alpha_values",
|
||||||
|
"eval_alpha_values": "eval_alpha_values",
|
||||||
"episodes": "episodes",
|
"episodes": "episodes",
|
||||||
"total_timesteps": "total_timesteps",
|
"total_timesteps": "total_timesteps",
|
||||||
"lambda_coi": "lambda_coi",
|
"lambda_coi": "lambda_coi",
|
||||||
"robust_radius": "robust_radius",
|
"robust_radius": "robust_radius",
|
||||||
"robust_points": "robust_points",
|
"robust_points": "robust_points",
|
||||||
"robust_rollouts": "robust_rollouts",
|
"robust_rollouts": "robust_rollouts",
|
||||||
|
"ambiguity_radius": "robust_radius",
|
||||||
|
"ambiguity_points": "robust_points",
|
||||||
|
"ambiguity_rollouts": "robust_rollouts",
|
||||||
"eta_ux": "eta_ux",
|
"eta_ux": "eta_ux",
|
||||||
"reward_profit_weight": "reward_profit_weight",
|
"reward_profit_weight": "reward_profit_weight",
|
||||||
"learning_rate": "learning_rate",
|
"learning_rate": "learning_rate",
|
||||||
"batch_size": "batch_size",
|
"batch_size": "batch_size",
|
||||||
"n_steps": "n_steps",
|
"n_steps": "n_steps",
|
||||||
|
"baseline_mode": "no_robust",
|
||||||
"no_robust": "no_robust",
|
"no_robust": "no_robust",
|
||||||
|
"margin_floor": "margin_floor",
|
||||||
"device": "device",
|
"device": "device",
|
||||||
}
|
}
|
||||||
for key in (
|
for key in (
|
||||||
"tiers",
|
"tiers",
|
||||||
"alpha_values",
|
"alpha_values",
|
||||||
|
"eval_alpha_values",
|
||||||
"episodes",
|
"episodes",
|
||||||
"total_timesteps",
|
"total_timesteps",
|
||||||
"lambda_coi",
|
"lambda_coi",
|
||||||
"robust_radius",
|
"robust_radius",
|
||||||
"robust_points",
|
"robust_points",
|
||||||
"robust_rollouts",
|
"robust_rollouts",
|
||||||
|
"ambiguity_radius",
|
||||||
|
"ambiguity_points",
|
||||||
|
"ambiguity_rollouts",
|
||||||
"eta_ux",
|
"eta_ux",
|
||||||
"reward_profit_weight",
|
"reward_profit_weight",
|
||||||
"learning_rate",
|
"learning_rate",
|
||||||
"batch_size",
|
"batch_size",
|
||||||
"n_steps",
|
"n_steps",
|
||||||
|
"baseline_mode",
|
||||||
"no_robust",
|
"no_robust",
|
||||||
|
"margin_floor",
|
||||||
"device",
|
"device",
|
||||||
):
|
):
|
||||||
if key in wandb.config:
|
if key in wandb.config:
|
||||||
@@ -560,18 +634,18 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
|
|
||||||
tiers = _parse_list(args.tiers)
|
tiers = _parse_list(args.tiers)
|
||||||
alpha_values = _parse_float_list(args.alpha_values)
|
alpha_values = _parse_float_list(args.alpha_values)
|
||||||
run_stamp = datetime.now(UTC).strftime("%m%d-%H%M%S")
|
run_stamp = datetime.now(timezone.utc).strftime("%m%d-%H%M%S")
|
||||||
compare_enabled = _truthy(os.environ.get("PHANTOM_BENCHMARK_COMPARE_ROBUST"))
|
compare_enabled = _truthy(os.environ.get("PHANTOM_BENCHMARK_COMPARE_ROBUST"))
|
||||||
compare_tag = "robust-compare" if compare_enabled else "single-mode"
|
compare_tag = "defended-compare" if compare_enabled else "single-mode"
|
||||||
modes = (
|
modes = (
|
||||||
[("no_robust", True), ("robust", False)]
|
[("baseline", True), ("defended", False)]
|
||||||
if compare_enabled
|
if compare_enabled
|
||||||
else [("no_robust" if bool(args.no_robust) else "robust", bool(args.no_robust))]
|
else [(_mode_label_from_baseline(bool(args.no_robust)), bool(args.no_robust))]
|
||||||
)
|
)
|
||||||
|
|
||||||
run_idx = 0
|
run_idx = 0
|
||||||
for tier in tiers:
|
for tier in tiers:
|
||||||
for mode_label, no_robust in modes:
|
for mode_label, baseline_mode in modes:
|
||||||
for alpha in alpha_values:
|
for alpha in alpha_values:
|
||||||
run_idx += 1
|
run_idx += 1
|
||||||
alpha_token = (
|
alpha_token = (
|
||||||
@@ -580,7 +654,7 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
tier_args = argparse.Namespace(**vars(args))
|
tier_args = argparse.Namespace(**vars(args))
|
||||||
tier_args.tiers = tier
|
tier_args.tiers = tier
|
||||||
tier_args.alpha_values = str(float(alpha))
|
tier_args.alpha_values = str(float(alpha))
|
||||||
tier_args.no_robust = bool(no_robust)
|
tier_args.no_robust = bool(baseline_mode)
|
||||||
run = wandb.init(
|
run = wandb.init(
|
||||||
project=args.project,
|
project=args.project,
|
||||||
name=(
|
name=(
|
||||||
@@ -597,16 +671,19 @@ def run_cli(raw_args: list[str] | None = None):
|
|||||||
"run.kind": "benchmark",
|
"run.kind": "benchmark",
|
||||||
"runtime/backend": tier,
|
"runtime/backend": tier,
|
||||||
"study/mode": mode_label,
|
"study/mode": mode_label,
|
||||||
"study/no_robust": float(no_robust),
|
"study/baseline_mode": float(baseline_mode),
|
||||||
"study/alpha": float(alpha),
|
"study/alpha": float(alpha),
|
||||||
"tiers": tier,
|
"tiers": tier,
|
||||||
"alpha_values": str(float(alpha)),
|
"alpha_values": str(float(alpha)),
|
||||||
|
"eval_alpha_values": args.eval_alpha_values,
|
||||||
"episodes": args.episodes,
|
"episodes": args.episodes,
|
||||||
"total_timesteps": args.total_timesteps,
|
"total_timesteps": args.total_timesteps,
|
||||||
"lambda_coi": args.lambda_coi,
|
"lambda_coi": args.lambda_coi,
|
||||||
"robust_radius": args.robust_radius,
|
"ambiguity_radius": args.robust_radius,
|
||||||
"robust_points": args.robust_points,
|
"ambiguity_points": args.robust_points,
|
||||||
"robust_rollouts": args.robust_rollouts,
|
"ambiguity_rollouts": args.robust_rollouts,
|
||||||
|
"margin_floor": args.margin_floor,
|
||||||
|
"baseline_mode": float(baseline_mode),
|
||||||
"eta_ux": args.eta_ux,
|
"eta_ux": args.eta_ux,
|
||||||
"reward_profit_weight": args.reward_profit_weight,
|
"reward_profit_weight": args.reward_profit_weight,
|
||||||
"learning_rate": args.learning_rate,
|
"learning_rate": args.learning_rate,
|
||||||
|
|||||||
@@ -48,7 +48,8 @@ class MarketEngine:
|
|||||||
)
|
)
|
||||||
human_transitions = get_adjusted_transitions(demand_h, human=True)
|
human_transitions = get_adjusted_transitions(demand_h, human=True)
|
||||||
agent_transitions = get_adjusted_transitions(demand_a, human=False)
|
agent_transitions = get_adjusted_transitions(demand_a, human=False)
|
||||||
# sample behavior trajectories from each demand distribution
|
# sample N trajectories in parallel; each chain is independent so threads
|
||||||
|
# do not share state and numpy's per-call RNG is thread-safe
|
||||||
human_t = [
|
human_t = [
|
||||||
sample_behavior_from_transitions(human_transitions)
|
sample_behavior_from_transitions(human_transitions)
|
||||||
for _ in range(self.Nhumans)
|
for _ in range(self.Nhumans)
|
||||||
@@ -59,7 +60,25 @@ class MarketEngine:
|
|||||||
]
|
]
|
||||||
# store trajectories for agent probability calculation
|
# store trajectories for agent probability calculation
|
||||||
self.last_trajectories = human_t + agent_t
|
self.last_trajectories = human_t + agent_t
|
||||||
return estimate_demand(self.last_trajectories, self.action_weights)
|
|
||||||
|
demand_proxy = estimate_demand(
|
||||||
|
self.last_trajectories,
|
||||||
|
self.action_weights,
|
||||||
|
normalize=True,
|
||||||
|
per_session=False,
|
||||||
|
)
|
||||||
|
raw_mix = ((1.0 - float(self.alpha)) * demand_h) + (
|
||||||
|
float(self.alpha) * demand_a
|
||||||
|
)
|
||||||
|
total_raw_demand = float(np.sum(raw_mix))
|
||||||
|
if not demand_proxy:
|
||||||
|
return {i: float(raw_mix[i]) for i in range(len(prices))}
|
||||||
|
if total_raw_demand <= 0.0:
|
||||||
|
return {i: 0.0 for i in range(len(prices))}
|
||||||
|
return {
|
||||||
|
i: total_raw_demand * float(demand_proxy.get(i, 0.0)) / 100.0
|
||||||
|
for i in range(len(prices))
|
||||||
|
}
|
||||||
|
|
||||||
def measure(self):
|
def measure(self):
|
||||||
pass
|
pass
|
||||||
|
|||||||
3
engine/jax/__init__.py
Normal file
3
engine/jax/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .robust import select_adversarial_alpha_jax, _JAX_OK
|
||||||
|
|
||||||
|
__all__ = ["select_adversarial_alpha_jax", "_JAX_OK"]
|
||||||
197
engine/jax/robust.py
Normal file
197
engine/jax/robust.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
"""JAX-accelerated robust inner loop for PHANTOM.
|
||||||
|
|
||||||
|
provides a drop-in replacement for the sequential alpha-candidate evaluation in
|
||||||
|
wrapper.py::_select_adversarial_alpha. the demand generation and reward
|
||||||
|
computation are vmapped over the K candidate alpha values so all candidates are
|
||||||
|
evaluated in a single vectorized pass instead of K sequential Python calls.
|
||||||
|
|
||||||
|
public surface:
|
||||||
|
select_adversarial_alpha_jax(candidates, prices, human_params, agent_params,
|
||||||
|
noise_std, n_sessions, n_products,
|
||||||
|
baseline_prices, lambda_coi, info_value,
|
||||||
|
reward_profit_weight, rng_key)
|
||||||
|
-> (best_alpha: float, rewards: np.ndarray)
|
||||||
|
|
||||||
|
falls back gracefully when JAX is unavailable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jax
|
||||||
|
import jax.numpy as jnp
|
||||||
|
from jax import vmap, jit
|
||||||
|
|
||||||
|
_JAX_OK = True
|
||||||
|
except ImportError:
|
||||||
|
_JAX_OK = False
|
||||||
|
|
||||||
|
_JAX_RUNTIME_OK = True
|
||||||
|
|
||||||
|
|
||||||
|
def _demand_for_actor_jax(prices, mean, std, noise_std, key):
|
||||||
|
"""d(p;theta) = max(0, val - price + noise), normalized to sum 100."""
|
||||||
|
k1, k2 = jax.random.split(key)
|
||||||
|
val = jax.random.normal(k1, shape=prices.shape) * std + mean
|
||||||
|
noise = jax.random.normal(k2, shape=prices.shape) * noise_std
|
||||||
|
demand = jnp.maximum(0.0, val - prices + noise)
|
||||||
|
total = demand.sum()
|
||||||
|
return jnp.where(total > 0, demand / total * 100.0, demand)
|
||||||
|
|
||||||
|
|
||||||
|
def _reward_for_candidate(
|
||||||
|
alpha,
|
||||||
|
prices,
|
||||||
|
human_mean,
|
||||||
|
human_std,
|
||||||
|
agent_mean,
|
||||||
|
agent_std,
|
||||||
|
noise_std,
|
||||||
|
baseline_prices,
|
||||||
|
lambda_coi,
|
||||||
|
info_value,
|
||||||
|
reward_profit_weight,
|
||||||
|
key,
|
||||||
|
):
|
||||||
|
"""compute a scalar reward for a single alpha candidate (pure JAX, vmappable)."""
|
||||||
|
k_h, k_a = jax.random.split(key)
|
||||||
|
# mixed demand proxy: weighted sum of human and agent demand signals
|
||||||
|
demand_h = _demand_for_actor_jax(prices, human_mean, human_std, noise_std, k_h)
|
||||||
|
demand_a = _demand_for_actor_jax(prices, agent_mean, agent_std, noise_std, k_a)
|
||||||
|
demand = (1.0 - alpha) * demand_h + alpha * demand_a
|
||||||
|
|
||||||
|
revenue = jnp.dot(prices, demand)
|
||||||
|
floor_cost = jnp.dot(baseline_prices, demand)
|
||||||
|
profit = revenue - floor_cost
|
||||||
|
|
||||||
|
# agent_prob proxy: use alpha directly (no trajectory available in vectorized path)
|
||||||
|
coi_leakage = alpha * info_value
|
||||||
|
info_budget = jnp.maximum(floor_cost, 1.0)
|
||||||
|
coi_penalty = lambda_coi * coi_leakage * info_budget
|
||||||
|
|
||||||
|
return reward_profit_weight * profit - coi_penalty
|
||||||
|
|
||||||
|
|
||||||
|
if _JAX_OK:
|
||||||
|
# compile once; retracing only happens on shape/dtype changes
|
||||||
|
# 12 args: alpha, prices, h_mean, h_std, a_mean, a_std, noise_std,
|
||||||
|
# baseline_prices, lambda_coi, info_value, reward_profit_weight, key
|
||||||
|
_reward_batched = jit(
|
||||||
|
vmap(
|
||||||
|
_reward_for_candidate,
|
||||||
|
in_axes=(0, None, None, None, None, None, None, None, None, None, None, 0),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def select_adversarial_alpha_jax(
|
||||||
|
candidates: np.ndarray,
|
||||||
|
prices: np.ndarray,
|
||||||
|
human_params: tuple,
|
||||||
|
agent_params: tuple,
|
||||||
|
noise_std: float,
|
||||||
|
baseline_prices: np.ndarray,
|
||||||
|
lambda_coi: float,
|
||||||
|
info_value: float,
|
||||||
|
reward_profit_weight: float,
|
||||||
|
rng_seed: int = 0,
|
||||||
|
) -> tuple[float, np.ndarray]:
|
||||||
|
"""evaluate all alpha candidates in a single vmapped pass.
|
||||||
|
|
||||||
|
returns (best_alpha, rewards_array) where best_alpha minimizes reward
|
||||||
|
(worst case for the platform, driving robust policy training).
|
||||||
|
|
||||||
|
falls back to a pure-numpy sequential loop when JAX is unavailable so the
|
||||||
|
wrapper can call this function unconditionally.
|
||||||
|
"""
|
||||||
|
global _JAX_RUNTIME_OK
|
||||||
|
|
||||||
|
if not _JAX_OK or not _JAX_RUNTIME_OK:
|
||||||
|
return _fallback(
|
||||||
|
candidates,
|
||||||
|
prices,
|
||||||
|
human_params,
|
||||||
|
agent_params,
|
||||||
|
noise_std,
|
||||||
|
baseline_prices,
|
||||||
|
lambda_coi,
|
||||||
|
info_value,
|
||||||
|
reward_profit_weight,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
k = len(candidates)
|
||||||
|
key = jax.random.PRNGKey(rng_seed)
|
||||||
|
keys = jax.random.split(key, k)
|
||||||
|
|
||||||
|
rewards = np.asarray(
|
||||||
|
_reward_batched(
|
||||||
|
jnp.asarray(candidates, dtype=jnp.float32),
|
||||||
|
jnp.asarray(prices, dtype=jnp.float32),
|
||||||
|
float(human_params[0]),
|
||||||
|
float(human_params[1]),
|
||||||
|
float(agent_params[0]),
|
||||||
|
float(agent_params[1]),
|
||||||
|
float(noise_std),
|
||||||
|
jnp.asarray(baseline_prices, dtype=jnp.float32),
|
||||||
|
float(lambda_coi),
|
||||||
|
float(info_value),
|
||||||
|
float(reward_profit_weight),
|
||||||
|
keys,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
best_idx = int(np.argmin(rewards))
|
||||||
|
return float(candidates[best_idx]), rewards
|
||||||
|
except Exception as exc:
|
||||||
|
# TPU contention / backend init failures can happen in distributed schedulers.
|
||||||
|
# Degrade to numpy path for the remainder of the process.
|
||||||
|
_JAX_RUNTIME_OK = False
|
||||||
|
print(f"PHANTOM_JAX_FALLBACK: {exc}")
|
||||||
|
return _fallback(
|
||||||
|
candidates,
|
||||||
|
prices,
|
||||||
|
human_params,
|
||||||
|
agent_params,
|
||||||
|
noise_std,
|
||||||
|
baseline_prices,
|
||||||
|
lambda_coi,
|
||||||
|
info_value,
|
||||||
|
reward_profit_weight,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback(
|
||||||
|
candidates,
|
||||||
|
prices,
|
||||||
|
human_params,
|
||||||
|
agent_params,
|
||||||
|
noise_std,
|
||||||
|
baseline_prices,
|
||||||
|
lambda_coi,
|
||||||
|
info_value,
|
||||||
|
reward_profit_weight,
|
||||||
|
):
|
||||||
|
"""numpy fallback matching the reward formula above."""
|
||||||
|
rewards = []
|
||||||
|
for alpha in candidates:
|
||||||
|
rng = np.random.default_rng()
|
||||||
|
val_h = rng.normal(*human_params, size=len(prices))
|
||||||
|
val_a = rng.normal(*agent_params, size=len(prices))
|
||||||
|
noise_h = rng.normal(0, noise_std, len(prices))
|
||||||
|
noise_a = rng.normal(0, noise_std, len(prices))
|
||||||
|
d_h = np.maximum(0, val_h - prices + noise_h)
|
||||||
|
d_a = np.maximum(0, val_a - prices + noise_a)
|
||||||
|
s_h, s_a = d_h.sum(), d_a.sum()
|
||||||
|
d_h = d_h / s_h * 100 if s_h > 0 else d_h
|
||||||
|
d_a = d_a / s_a * 100 if s_a > 0 else d_a
|
||||||
|
demand = (1.0 - alpha) * d_h + alpha * d_a
|
||||||
|
revenue = float(np.dot(prices, demand))
|
||||||
|
floor_cost = float(np.dot(baseline_prices, demand))
|
||||||
|
profit = revenue - floor_cost
|
||||||
|
coi_penalty = lambda_coi * alpha * info_value * max(floor_cost, 1.0)
|
||||||
|
rewards.append(reward_profit_weight * profit - coi_penalty)
|
||||||
|
rewards = np.array(rewards)
|
||||||
|
best_idx = int(np.argmin(rewards))
|
||||||
|
return float(candidates[best_idx]), rewards
|
||||||
@@ -22,6 +22,9 @@ human_dir = str(base_dir / "collected_data")
|
|||||||
agent_dir = str(base_dir / "agents" / "collected_data")
|
agent_dir = str(base_dir / "agents" / "collected_data")
|
||||||
|
|
||||||
_cache = {} # lazy cache for models and base pivots
|
_cache = {} # lazy cache for models and base pivots
|
||||||
|
# cache keyed by (human: bool, condition_tuple) so we skip Kronecker re-expansion
|
||||||
|
# for repeated calls with the same demand condition inside the robustness inner loop
|
||||||
|
_transition_cache: dict = {}
|
||||||
|
|
||||||
|
|
||||||
def _get_base_pivot(human: bool):
|
def _get_base_pivot(human: bool):
|
||||||
@@ -68,22 +71,41 @@ def trajectory_to_events(trajectory: list) -> list:
|
|||||||
"""extract event names from trajectory for KL divergence calculation
|
"""extract event names from trajectory for KL divergence calculation
|
||||||
|
|
||||||
trajectories are in format 'eventName_product0', extract just eventName
|
trajectories are in format 'eventName_product0', extract just eventName
|
||||||
|
|
||||||
args:
|
|
||||||
trajectory: list like ['view_product0', 'add_to_cart_product1', 'checkout_product1']
|
|
||||||
|
|
||||||
returns:
|
|
||||||
list: event names like ['view', 'add_to_cart', 'checkout']
|
|
||||||
"""
|
"""
|
||||||
events = []
|
return [s.rsplit("_product", 1)[0] if "_product" in s else s for s in trajectory]
|
||||||
for state in trajectory:
|
|
||||||
# state format from sample_behavior: 'eventName_productX'
|
|
||||||
if "_product" in state:
|
class _TransitionTable:
|
||||||
event = state.rsplit("_product", 1)[0]
|
"""numpy-backed transition table; replaces per-step pandas .loc[] indexing.
|
||||||
else:
|
|
||||||
event = state
|
the profiling hotspot was DataFrame.xs called ~4-16k times per outer step.
|
||||||
events.append(event)
|
converting once to a dense float32 array with an int-keyed state index map
|
||||||
return events
|
reduces each row lookup to a single array slice with no pandas overhead.
|
||||||
|
rows are pre-normalized so sampling requires no per-step division.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("matrix", "states", "state_index", "n_states")
|
||||||
|
|
||||||
|
def __init__(self, df: pd.DataFrame):
|
||||||
|
self.states: list[str] = df.index.tolist()
|
||||||
|
self.state_index: dict[str, int] = {s: i for i, s in enumerate(self.states)}
|
||||||
|
# float64 throughout: float32 row-sums can drift enough to break np.random.choice
|
||||||
|
mat = np.nan_to_num(
|
||||||
|
df.values.astype(np.float64), nan=0.0, posinf=0.0, neginf=0.0
|
||||||
|
)
|
||||||
|
mat = np.clip(mat, 0.0, None)
|
||||||
|
row_sums = mat.sum(axis=1)
|
||||||
|
# dead rows (all zero) get uniform distribution so sampling never receives NaN
|
||||||
|
dead = row_sums <= 0
|
||||||
|
mat[dead] = 1.0
|
||||||
|
row_sums[dead] = float(mat.shape[1])
|
||||||
|
mat = mat / row_sums[:, np.newaxis]
|
||||||
|
# final nan guard in case fp still drifts
|
||||||
|
np.nan_to_num(mat, nan=0.0, copy=False)
|
||||||
|
row_sums2 = mat.sum(axis=1, keepdims=True)
|
||||||
|
row_sums2[row_sums2 <= 0] = 1.0
|
||||||
|
self.matrix: np.ndarray = mat / row_sums2
|
||||||
|
self.n_states: int = len(self.states)
|
||||||
|
|
||||||
|
|
||||||
def adjust_behavior_to_condition(condition, transition_matrix):
|
def adjust_behavior_to_condition(condition, transition_matrix):
|
||||||
@@ -92,46 +114,73 @@ def adjust_behavior_to_condition(condition, transition_matrix):
|
|||||||
condition = np.nan_to_num(condition, nan=0.0, posinf=0.0, neginf=0.0)
|
condition = np.nan_to_num(condition, nan=0.0, posinf=0.0, neginf=0.0)
|
||||||
condition = np.clip(condition, 0.0, None)
|
condition = np.clip(condition, 0.0, None)
|
||||||
s = float(np.sum(condition))
|
s = float(np.sum(condition))
|
||||||
if not np.isfinite(s) or s <= 0:
|
cond_norm = (
|
||||||
cond_norm = np.full(len(condition), 1.0 / max(len(condition), 1), dtype=float)
|
condition / s
|
||||||
else:
|
if np.isfinite(s) and s > 0
|
||||||
cond_norm = condition / s
|
else np.full(len(condition), 1.0 / max(len(condition), 1), dtype=float)
|
||||||
|
)
|
||||||
n_products = len(condition)
|
n_products = len(condition)
|
||||||
base_vals = transition_matrix.values
|
base_vals = transition_matrix.values
|
||||||
base_cols, base_rows = (
|
base_cols, base_rows = (
|
||||||
transition_matrix.columns.tolist(),
|
transition_matrix.columns.tolist(),
|
||||||
transition_matrix.index.tolist(),
|
transition_matrix.index.tolist(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm
|
|
||||||
expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
|
expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
|
||||||
new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
|
new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
|
||||||
new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
|
new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
|
||||||
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
|
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
|
||||||
|
|
||||||
|
|
||||||
def get_adjusted_transitions(condition, human=True):
|
def get_adjusted_transitions(condition, human=True) -> _TransitionTable:
|
||||||
|
"""return a _TransitionTable for the given demand condition.
|
||||||
|
|
||||||
|
results are cached by (human, rounded-condition) so that repeated calls with
|
||||||
|
the same condition inside the robustness inner loop (K candidates, same prices)
|
||||||
|
skip the Kronecker expansion entirely.
|
||||||
|
"""
|
||||||
|
condition = np.asarray(condition, dtype=float)
|
||||||
|
# round to 4 significant digits for cache key stability
|
||||||
|
cache_key = (human, tuple(np.round(condition, 4).tolist()))
|
||||||
|
if cache_key in _transition_cache:
|
||||||
|
return _transition_cache[cache_key]
|
||||||
|
|
||||||
|
# prevent OOM by capping cache size
|
||||||
|
if len(_transition_cache) > 100:
|
||||||
|
_transition_cache.clear()
|
||||||
|
|
||||||
base_pivot = _get_base_pivot(human)
|
base_pivot = _get_base_pivot(human)
|
||||||
return adjust_behavior_to_condition(condition, base_pivot)
|
df = adjust_behavior_to_condition(condition, base_pivot)
|
||||||
|
table = _TransitionTable(df)
|
||||||
|
_transition_cache[cache_key] = table
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
def sample_behavior_from_transitions(adjusted_transitions, max_len=40):
|
def clear_transition_cache():
|
||||||
trajectory = [np.random.choice(adjusted_transitions.index)]
|
"""drop cached transition tables; call between episodes if condition space is large."""
|
||||||
|
_transition_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def sample_behavior_from_transitions(table, max_len=40):
|
||||||
|
"""sample a Markov trajectory.
|
||||||
|
|
||||||
|
accepts _TransitionTable (fast path) or a legacy pandas DataFrame so existing
|
||||||
|
call sites that pass a DataFrame directly continue to work unchanged.
|
||||||
|
"""
|
||||||
|
if isinstance(table, pd.DataFrame):
|
||||||
|
table = _TransitionTable(table)
|
||||||
|
|
||||||
|
idx = np.random.randint(table.n_states)
|
||||||
|
trajectory = [table.states[idx]]
|
||||||
while len(trajectory) < max_len and "checkout" not in trajectory[-1]:
|
while len(trajectory) < max_len and "checkout" not in trajectory[-1]:
|
||||||
probs = np.asarray(adjusted_transitions.loc[trajectory[-1]].values, dtype=float)
|
row = table.matrix[table.state_index[trajectory[-1]]]
|
||||||
probs = np.nan_to_num(probs, nan=0.0, posinf=0.0, neginf=0.0)
|
idx = int(np.random.choice(table.n_states, p=row))
|
||||||
probs = np.clip(probs, 0.0, None)
|
trajectory.append(table.states[idx])
|
||||||
s = float(np.sum(probs))
|
|
||||||
sample = np.random.choice(
|
|
||||||
adjusted_transitions.columns, p=(probs / s) if s > 0 else None
|
|
||||||
)
|
|
||||||
trajectory.append(sample)
|
|
||||||
return trajectory
|
return trajectory
|
||||||
|
|
||||||
|
|
||||||
def sample_behavior(condition, human=True, max_len=40):
|
def sample_behavior(condition, human=True, max_len=40):
|
||||||
adjusted_transitions = get_adjusted_transitions(condition, human=human)
|
table = get_adjusted_transitions(condition, human=human)
|
||||||
return sample_behavior_from_transitions(adjusted_transitions, max_len=max_len)
|
return sample_behavior_from_transitions(table, max_len=max_len)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -15,15 +15,19 @@ class MetricsCallback(BaseCallback):
|
|||||||
self,
|
self,
|
||||||
log_histograms: bool = False,
|
log_histograms: bool = False,
|
||||||
log_freq: int = 100,
|
log_freq: int = 100,
|
||||||
|
hist_freq: int = 500,
|
||||||
step_offset: int = 0,
|
step_offset: int = 0,
|
||||||
verbose: int = 0,
|
verbose: int = 0,
|
||||||
):
|
):
|
||||||
super().__init__(verbose)
|
super().__init__(verbose)
|
||||||
self.log_histograms = log_histograms
|
self.log_histograms = log_histograms
|
||||||
self.log_freq = max(1, int(log_freq))
|
self.log_freq = max(1, int(log_freq))
|
||||||
|
self.hist_freq = max(1, int(hist_freq))
|
||||||
self.step_offset = max(0, int(step_offset))
|
self.step_offset = max(0, int(step_offset))
|
||||||
self._wandb = get_wandb_module()
|
self._wandb = get_wandb_module()
|
||||||
self._wandb_live = bool(self._wandb is not None and self._wandb.run is not None)
|
self._wandb_live = bool(self._wandb is not None and self._wandb.run is not None)
|
||||||
|
self._price_samples: list[float] = []
|
||||||
|
self._demand_samples: list[float] = []
|
||||||
self._window_sums = {
|
self._window_sums = {
|
||||||
"train/revenue_mean": 0.0,
|
"train/revenue_mean": 0.0,
|
||||||
"train/margin_mean": 0.0,
|
"train/margin_mean": 0.0,
|
||||||
@@ -74,9 +78,67 @@ class MetricsCallback(BaseCallback):
|
|||||||
)
|
)
|
||||||
self._window_count += 1
|
self._window_count += 1
|
||||||
|
|
||||||
def _flush(self, step: int) -> None:
|
def _accumulate_histograms(self, info: dict[str, Any]) -> None:
|
||||||
if self._window_count <= 0:
|
if not self.log_histograms:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
for key in ("effective_prices", "prices"):
|
||||||
|
if key not in info:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
values = np.asarray(info.get(key), dtype=float).reshape(-1)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if values.size <= 0:
|
||||||
|
continue
|
||||||
|
finite_values = values[np.isfinite(values)]
|
||||||
|
if finite_values.size > 0:
|
||||||
|
self._price_samples.extend(finite_values.tolist())
|
||||||
|
break
|
||||||
|
|
||||||
|
if "demand" in info:
|
||||||
|
try:
|
||||||
|
demand_values = np.asarray(info.get("demand"), dtype=float).reshape(-1)
|
||||||
|
except Exception:
|
||||||
|
demand_values = np.array([], dtype=float)
|
||||||
|
if demand_values.size > 0:
|
||||||
|
finite_demand = demand_values[np.isfinite(demand_values)]
|
||||||
|
if finite_demand.size > 0:
|
||||||
|
self._demand_samples.extend(finite_demand.tolist())
|
||||||
|
|
||||||
|
def _flush_histograms(self, step: int, force: bool = False) -> None:
|
||||||
|
if not self.log_histograms:
|
||||||
|
return
|
||||||
|
if not force and step % self.hist_freq != 0:
|
||||||
|
return
|
||||||
|
if not self._price_samples and not self._demand_samples:
|
||||||
|
return
|
||||||
|
if self._wandb is None:
|
||||||
|
self._price_samples.clear()
|
||||||
|
self._demand_samples.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
payload: dict[str, Any] = {}
|
||||||
|
if self._price_samples:
|
||||||
|
payload["train/price_dist"] = self._wandb.Histogram(
|
||||||
|
np.asarray(self._price_samples, dtype=np.float32)
|
||||||
|
)
|
||||||
|
if self._demand_samples:
|
||||||
|
payload["train/demand_dist"] = self._wandb.Histogram(
|
||||||
|
np.asarray(self._demand_samples, dtype=np.float32)
|
||||||
|
)
|
||||||
|
|
||||||
|
if payload and self._wandb_live:
|
||||||
|
try:
|
||||||
|
self._wandb.log(payload, step=self.step_offset + int(step))
|
||||||
|
except Exception:
|
||||||
|
self._wandb_live = False
|
||||||
|
|
||||||
|
self._price_samples.clear()
|
||||||
|
self._demand_samples.clear()
|
||||||
|
|
||||||
|
def _flush(self, step: int, *, force_hist: bool = False) -> None:
|
||||||
|
if self._window_count > 0:
|
||||||
denom = float(self._window_count)
|
denom = float(self._window_count)
|
||||||
payload = {
|
payload = {
|
||||||
key: (value / denom)
|
key: (value / denom)
|
||||||
@@ -92,17 +154,24 @@ class MetricsCallback(BaseCallback):
|
|||||||
}
|
}
|
||||||
payload["train/global_step"] = int(step)
|
payload["train/global_step"] = int(step)
|
||||||
if self._wandb_live:
|
if self._wandb_live:
|
||||||
|
try:
|
||||||
self._wandb.log(dict(payload), step=self.step_offset + int(step))
|
self._wandb.log(dict(payload), step=self.step_offset + int(step))
|
||||||
|
except Exception:
|
||||||
|
self._wandb_live = False
|
||||||
|
self.events.append(payload)
|
||||||
else:
|
else:
|
||||||
self.events.append(payload)
|
self.events.append(payload)
|
||||||
for key in self._window_sums:
|
for key in self._window_sums:
|
||||||
self._window_sums[key] = 0.0
|
self._window_sums[key] = 0.0
|
||||||
self._window_count = 0
|
self._window_count = 0
|
||||||
|
|
||||||
|
self._flush_histograms(step=step, force=force_hist)
|
||||||
|
|
||||||
def _on_step(self) -> bool:
|
def _on_step(self) -> bool:
|
||||||
for info in self.locals.get("infos", []):
|
for info in self.locals.get("infos", []):
|
||||||
if isinstance(info, dict):
|
if isinstance(info, dict):
|
||||||
self._accumulate(info)
|
self._accumulate(info)
|
||||||
|
self._accumulate_histograms(info)
|
||||||
|
|
||||||
if self.num_timesteps % self.log_freq == 0:
|
if self.num_timesteps % self.log_freq == 0:
|
||||||
self._flush(step=self.num_timesteps)
|
self._flush(step=self.num_timesteps)
|
||||||
@@ -110,39 +179,81 @@ class MetricsCallback(BaseCallback):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _on_training_end(self) -> None:
|
def _on_training_end(self) -> None:
|
||||||
self._flush(step=self.num_timesteps)
|
self._flush(step=self.num_timesteps, force_hist=True)
|
||||||
|
|
||||||
|
|
||||||
class EvalMetricsCallback(EvalCallback):
|
class EvalMetricsCallback(EvalCallback):
|
||||||
"""Deterministic evaluation collector detached from logging backends."""
|
"""Deterministic evaluation collector detached from logging backends."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, eval_env, eval_freq: int = 1000, n_eval_episodes: int = 5, **kwargs
|
self,
|
||||||
|
eval_env,
|
||||||
|
eval_freq: int = 1000,
|
||||||
|
n_eval_episodes: int = 5,
|
||||||
|
step_offset: int = 0,
|
||||||
|
**kwargs,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
eval_env, eval_freq=eval_freq, n_eval_episodes=n_eval_episodes, **kwargs
|
eval_env, eval_freq=eval_freq, n_eval_episodes=n_eval_episodes, **kwargs
|
||||||
)
|
)
|
||||||
self._eval_revenues: list[float] = []
|
self.step_offset = max(0, int(step_offset))
|
||||||
|
self._wandb = get_wandb_module()
|
||||||
|
self._wandb_live = bool(self._wandb is not None and self._wandb.run is not None)
|
||||||
|
self._eval_stats: dict[str, list[float]] = {
|
||||||
|
"eval/revenue_mean": [],
|
||||||
|
"eval/margin_mean": [],
|
||||||
|
"eval/coi_level_mean": [],
|
||||||
|
"eval/coi_leakage_mean": [],
|
||||||
|
"eval/volatility_mean": [],
|
||||||
|
"eval/agent_prob_mean": [],
|
||||||
|
}
|
||||||
self.events: list[dict[str, float | int]] = []
|
self.events: list[dict[str, float | int]] = []
|
||||||
|
|
||||||
def _on_step(self) -> bool:
|
def _on_step(self) -> bool:
|
||||||
result = super()._on_step()
|
result = super()._on_step()
|
||||||
if self.n_calls % self.eval_freq == 0 and hasattr(self, "last_mean_reward"):
|
if self.n_calls % self.eval_freq == 0 and hasattr(self, "last_mean_reward"):
|
||||||
self.events.append(
|
payload: dict[str, float | int] = {
|
||||||
{
|
|
||||||
"eval/reward_mean": float(self.last_mean_reward),
|
"eval/reward_mean": float(self.last_mean_reward),
|
||||||
"eval/revenue_mean": float(np.mean(self._eval_revenues))
|
|
||||||
if self._eval_revenues
|
|
||||||
else 0.0,
|
|
||||||
"train/global_step": int(self.num_timesteps),
|
"train/global_step": int(self.num_timesteps),
|
||||||
}
|
}
|
||||||
|
for key, values in self._eval_stats.items():
|
||||||
|
payload[key] = float(np.mean(values)) if values else 0.0
|
||||||
|
|
||||||
|
if self._wandb_live:
|
||||||
|
try:
|
||||||
|
self._wandb.log(
|
||||||
|
dict(payload),
|
||||||
|
step=self.step_offset + int(self.num_timesteps),
|
||||||
)
|
)
|
||||||
self._eval_revenues = []
|
except Exception:
|
||||||
|
self._wandb_live = False
|
||||||
|
self.events.append(payload)
|
||||||
|
else:
|
||||||
|
self.events.append(payload)
|
||||||
|
|
||||||
|
for values in self._eval_stats.values():
|
||||||
|
values.clear()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _log_success_callback(self, locals_: dict, globals_: dict) -> None:
|
def _log_success_callback(self, locals_: dict, globals_: dict) -> None:
|
||||||
# called after each eval episode
|
# called after each eval episode
|
||||||
info = locals_.get("info", {})
|
info = locals_.get("info", {})
|
||||||
if "economics" in info:
|
econ = info.get("economics") if isinstance(info, dict) else None
|
||||||
self._eval_revenues.append(info["economics"]["revenue"])
|
if not isinstance(econ, dict):
|
||||||
|
return
|
||||||
|
|
||||||
|
self._eval_stats["eval/revenue_mean"].append(float(econ.get("revenue", 0.0)))
|
||||||
|
self._eval_stats["eval/margin_mean"].append(float(econ.get("margin", 0.0)))
|
||||||
|
self._eval_stats["eval/coi_level_mean"].append(
|
||||||
|
float(econ.get("coi_level", 0.0))
|
||||||
|
)
|
||||||
|
self._eval_stats["eval/coi_leakage_mean"].append(
|
||||||
|
float(econ.get("coi_leakage", 0.0))
|
||||||
|
)
|
||||||
|
self._eval_stats["eval/volatility_mean"].append(
|
||||||
|
float(econ.get("volatility", 0.0))
|
||||||
|
)
|
||||||
|
self._eval_stats["eval/agent_prob_mean"].append(
|
||||||
|
float(econ.get("agent_prob", 0.0))
|
||||||
|
)
|
||||||
|
|||||||
@@ -17,18 +17,32 @@ def generate_demand_for_actor(
|
|||||||
params: tuple,
|
params: tuple,
|
||||||
noise_std: float = 1.0,
|
noise_std: float = 1.0,
|
||||||
distribution_method=np.random.normal,
|
distribution_method=np.random.normal,
|
||||||
|
normalize: bool = False,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""d(p;0) = max(0, valuation - price) + epsi for single actor type
|
"""d(p;0) = max(0, valuation - price) + epsi for single actor type
|
||||||
params: (mean, std) for valuation distribution D_H or D_A"""
|
params: (mean, std) for valuation distribution D_H or D_A"""
|
||||||
val = distribution_method(*params, size=len(prices))
|
val = distribution_method(*params, size=len(prices))
|
||||||
noise = distribution_method(0, noise_std, len(prices))
|
noise = distribution_method(0, noise_std, len(prices))
|
||||||
demand = np.maximum(0, val - prices + noise)
|
demand = np.maximum(0, val - prices + noise)
|
||||||
|
if not normalize:
|
||||||
|
return demand
|
||||||
total = np.sum(demand)
|
total = np.sum(demand)
|
||||||
return demand / total * 100 if total > 0 else demand
|
return demand / total * 100 if total > 0 else demand
|
||||||
|
|
||||||
|
|
||||||
def estimate_demand(trajectories, action_weights=None):
|
def estimate_demand(
|
||||||
return estimate_weighted_demand(trajectories, action_weights)
|
trajectories,
|
||||||
|
action_weights=None,
|
||||||
|
*,
|
||||||
|
normalize: bool = False,
|
||||||
|
per_session: bool = True,
|
||||||
|
):
|
||||||
|
return estimate_weighted_demand(
|
||||||
|
trajectories,
|
||||||
|
action_weights,
|
||||||
|
normalize=normalize,
|
||||||
|
per_session=per_session,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_event_state(state: str):
|
def _parse_event_state(state: str):
|
||||||
@@ -50,7 +64,13 @@ def _weight_for_action(action: str, action_weights: dict) -> float:
|
|||||||
return CATEGORY_WEIGHTS["nav"]
|
return CATEGORY_WEIGHTS["nav"]
|
||||||
|
|
||||||
|
|
||||||
def estimate_weighted_demand(trajectories, action_weights=None):
|
def estimate_weighted_demand(
|
||||||
|
trajectories,
|
||||||
|
action_weights=None,
|
||||||
|
*,
|
||||||
|
normalize: bool = False,
|
||||||
|
per_session: bool = True,
|
||||||
|
):
|
||||||
action_weights = (
|
action_weights = (
|
||||||
DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights
|
DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights
|
||||||
)
|
)
|
||||||
@@ -64,12 +84,20 @@ def estimate_weighted_demand(trajectories, action_weights=None):
|
|||||||
if w <= 0:
|
if w <= 0:
|
||||||
continue
|
continue
|
||||||
scores[product_id] = scores.get(product_id, 0.0) + w
|
scores[product_id] = scores.get(product_id, 0.0) + w
|
||||||
total = sum(scores.values())
|
if not scores:
|
||||||
return (
|
return {}
|
||||||
{pid: (score / total) * 100 for pid, score in scores.items()}
|
|
||||||
if total > 0
|
if per_session and len(trajectories) > 0:
|
||||||
else {}
|
inv_n = 1.0 / float(len(trajectories))
|
||||||
)
|
scores = {pid: score * inv_n for pid, score in scores.items()}
|
||||||
|
|
||||||
|
if not normalize:
|
||||||
|
return scores
|
||||||
|
|
||||||
|
total = float(sum(scores.values()))
|
||||||
|
if total <= 0:
|
||||||
|
return {}
|
||||||
|
return {pid: (score / total) * 100.0 for pid, score in scores.items()}
|
||||||
|
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
|
|||||||
@@ -156,6 +156,7 @@ class ProviderBenchmark:
|
|||||||
|
|
||||||
# log to wandb if available
|
# log to wandb if available
|
||||||
if HAS_WANDB and wandb.run is not None:
|
if HAS_WANDB and wandb.run is not None:
|
||||||
|
try:
|
||||||
wandb.log(
|
wandb.log(
|
||||||
{
|
{
|
||||||
f"benchmark/{name}/revenue": result.mean_revenue,
|
f"benchmark/{name}/revenue": result.mean_revenue,
|
||||||
@@ -164,6 +165,8 @@ class ProviderBenchmark:
|
|||||||
"benchmark/alpha": alpha,
|
"benchmark/alpha": alpha,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return self.results
|
return self.results
|
||||||
|
|
||||||
|
|||||||
@@ -32,17 +32,23 @@ class EconomicMetricsWrapper(gym.Wrapper):
|
|||||||
obs, reward, terminated, truncated, info = self.env.step(action)
|
obs, reward, terminated, truncated, info = self.env.step(action)
|
||||||
|
|
||||||
# extract from unwrapped env
|
# extract from unwrapped env
|
||||||
prices = self.env.unwrapped._prices
|
quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float)
|
||||||
|
effective_prices = np.asarray(
|
||||||
|
info.get("effective_prices", quoted_prices), dtype=float
|
||||||
|
)
|
||||||
|
if effective_prices.shape != quoted_prices.shape:
|
||||||
|
effective_prices = quoted_prices
|
||||||
demand_dict = self.env.unwrapped._demand
|
demand_dict = self.env.unwrapped._demand
|
||||||
demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))])
|
demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))])
|
||||||
|
|
||||||
# core calculations
|
# core calculations
|
||||||
revenue = float(np.sum(prices * demand))
|
revenue = float(info.get("revenue", np.sum(effective_prices * demand)))
|
||||||
avg_price = float(np.mean(prices))
|
quoted_revenue = float(np.sum(quoted_prices * demand))
|
||||||
|
avg_price = float(np.mean(effective_prices))
|
||||||
margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
|
margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
|
||||||
coi_level = avg_price - self.p_min # E[P] - p_min per thesis Def 1
|
coi_level = avg_price - self.p_min # E[P] - p_min per thesis Def 1
|
||||||
|
|
||||||
self._price_history.append(prices.copy())
|
self._price_history.append(effective_prices.copy())
|
||||||
self._revenue_history.append(revenue)
|
self._revenue_history.append(revenue)
|
||||||
|
|
||||||
# regret vs baseline (golden path)
|
# regret vs baseline (golden path)
|
||||||
@@ -53,6 +59,7 @@ class EconomicMetricsWrapper(gym.Wrapper):
|
|||||||
# inject structured metrics into info
|
# inject structured metrics into info
|
||||||
info["economics"] = {
|
info["economics"] = {
|
||||||
"revenue": revenue,
|
"revenue": revenue,
|
||||||
|
"quoted_revenue": quoted_revenue,
|
||||||
"margin": margin,
|
"margin": margin,
|
||||||
"coi_level": coi_level,
|
"coi_level": coi_level,
|
||||||
"regret": regret,
|
"regret": regret,
|
||||||
@@ -64,6 +71,10 @@ class EconomicMetricsWrapper(gym.Wrapper):
|
|||||||
"coi_penalty",
|
"coi_penalty",
|
||||||
"ux_penalty",
|
"ux_penalty",
|
||||||
"volatility",
|
"volatility",
|
||||||
|
"upward_volatility",
|
||||||
|
"supra_penalty",
|
||||||
|
"supra_share",
|
||||||
|
"competitive_anchor",
|
||||||
"profit",
|
"profit",
|
||||||
"cost_floor",
|
"cost_floor",
|
||||||
"reward_revenue",
|
"reward_revenue",
|
||||||
@@ -71,10 +82,13 @@ class EconomicMetricsWrapper(gym.Wrapper):
|
|||||||
"agent_prob",
|
"agent_prob",
|
||||||
"alpha_adv",
|
"alpha_adv",
|
||||||
"alpha_nominal",
|
"alpha_nominal",
|
||||||
|
"erosion_share",
|
||||||
|
"effective_price_mean",
|
||||||
):
|
):
|
||||||
if key in info:
|
if key in info:
|
||||||
info["economics"][key] = info[key]
|
info["economics"][key] = info[key]
|
||||||
info["prices"] = prices.copy()
|
info["prices"] = quoted_prices.copy()
|
||||||
|
info["effective_prices"] = effective_prices.copy()
|
||||||
info["demand"] = demand.copy()
|
info["demand"] = demand.copy()
|
||||||
|
|
||||||
return obs, reward, terminated, truncated, info
|
return obs, reward, terminated, truncated, info
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from ..telemetry.wandb import (
|
|||||||
get_wandb_module,
|
get_wandb_module,
|
||||||
init_run,
|
init_run,
|
||||||
run_agent,
|
run_agent,
|
||||||
|
update_summary,
|
||||||
)
|
)
|
||||||
from .train import run_with_active_sweep_run
|
from .train import run_with_active_sweep_run
|
||||||
|
|
||||||
@@ -43,6 +44,7 @@ def run_sweep_agent(
|
|||||||
spec = TrainSpec.from_flat(merged)
|
spec = TrainSpec.from_flat(merged)
|
||||||
if run is not None:
|
if run is not None:
|
||||||
run.name = run_name(spec, kind=kind, scenario=scenario)
|
run.name = run_name(spec, kind=kind, scenario=scenario)
|
||||||
|
try:
|
||||||
run_with_active_sweep_run(
|
run_with_active_sweep_run(
|
||||||
spec,
|
spec,
|
||||||
kind=kind,
|
kind=kind,
|
||||||
@@ -50,6 +52,15 @@ def run_sweep_agent(
|
|||||||
group=group,
|
group=group,
|
||||||
extra_tags=extra_tags,
|
extra_tags=extra_tags,
|
||||||
)
|
)
|
||||||
|
update_summary({"run/status": "finished"})
|
||||||
|
except Exception as exc:
|
||||||
|
update_summary(
|
||||||
|
{
|
||||||
|
"run/status": "crashed",
|
||||||
|
"run/error": str(exc),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
finish_run()
|
finish_run()
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ def _tags_for_run(spec: TrainSpec, kind: str, extra_tags: Sequence[str]) -> list
|
|||||||
kind,
|
kind,
|
||||||
spec.algorithm.name,
|
spec.algorithm.name,
|
||||||
spec.runtime.backend,
|
spec.runtime.backend,
|
||||||
"vanilla" if spec.study.no_robust else "robust",
|
"baseline" if spec.study.no_robust else "defended",
|
||||||
]
|
]
|
||||||
tags.extend([tag for tag in extra_tags if tag])
|
tags.extend([tag for tag in extra_tags if tag])
|
||||||
return tags
|
return tags
|
||||||
|
|||||||
@@ -91,6 +91,44 @@
|
|||||||
"command": "bash scripts/nx_research.sh docker-train-publish",
|
"command": "bash scripts/nx_research.sh docker-train-publish",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"whoclicked-publish": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"dependsOn": [
|
||||||
|
"install"
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh whoclicked-publish",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tpu-ray-bootstrap": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh tpu-ray-bootstrap",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tpu-ray-deps": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh tpu-ray-deps",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tpu-ray-verify": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh tpu-ray-verify",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tpu-ray-teardown": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh tpu-ray-teardown",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
|
|||||||
@@ -32,10 +32,17 @@ def _normalize_keys(raw: Mapping[str, Any]) -> dict[str, Any]:
|
|||||||
"study.robust_radius": "robust_radius",
|
"study.robust_radius": "robust_radius",
|
||||||
"study.robust_points": "robust_points",
|
"study.robust_points": "robust_points",
|
||||||
"study.robust_rollouts": "robust_rollouts",
|
"study.robust_rollouts": "robust_rollouts",
|
||||||
|
"study.ambiguity_radius": "robust_radius",
|
||||||
|
"study.ambiguity_points": "robust_points",
|
||||||
|
"study.ambiguity_rollouts": "robust_rollouts",
|
||||||
"study.info_value": "info_value",
|
"study.info_value": "info_value",
|
||||||
"study.eta_ux": "eta_ux",
|
"study.eta_ux": "eta_ux",
|
||||||
"study.reward_profit_weight": "reward_profit_weight",
|
"study.reward_profit_weight": "reward_profit_weight",
|
||||||
"study.revenue_weight": "revenue_weight",
|
"ambiguity_radius": "robust_radius",
|
||||||
|
"ambiguity_points": "robust_points",
|
||||||
|
"ambiguity_rollouts": "robust_rollouts",
|
||||||
|
"baseline_mode": "no_robust",
|
||||||
|
"stress_eval_enabled": "robust_eval_enabled",
|
||||||
"optimizer.learning_rate": "learning_rate",
|
"optimizer.learning_rate": "learning_rate",
|
||||||
"optimizer.gamma": "gamma",
|
"optimizer.gamma": "gamma",
|
||||||
"optimizer.batch_size": "batch_size",
|
"optimizer.batch_size": "batch_size",
|
||||||
@@ -45,6 +52,7 @@ def _normalize_keys(raw: Mapping[str, Any]) -> dict[str, Any]:
|
|||||||
"runtime.seed": "seed",
|
"runtime.seed": "seed",
|
||||||
"runtime.total_timesteps": "total_timesteps",
|
"runtime.total_timesteps": "total_timesteps",
|
||||||
"runtime.checkpoint_interval": "checkpoint_interval",
|
"runtime.checkpoint_interval": "checkpoint_interval",
|
||||||
|
"runtime.hist_freq": "hist_freq",
|
||||||
"eval.eval_freq": "eval_freq",
|
"eval.eval_freq": "eval_freq",
|
||||||
"eval.eval_episodes": "eval_episodes",
|
"eval.eval_episodes": "eval_episodes",
|
||||||
}
|
}
|
||||||
@@ -72,6 +80,8 @@ class EnvSpec:
|
|||||||
max_steps: int = 100
|
max_steps: int = 100
|
||||||
margin_floor: float = 0.05
|
margin_floor: float = 0.05
|
||||||
margin_floor_patience: int = 5
|
margin_floor_patience: int = 5
|
||||||
|
agent_mu: float = 45.0
|
||||||
|
agent_std: float = 15.0
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -84,7 +94,6 @@ class StudySpec:
|
|||||||
info_value: float = 1.0
|
info_value: float = 1.0
|
||||||
eta_ux: float = 0.5
|
eta_ux: float = 0.5
|
||||||
reward_profit_weight: float = 1.0
|
reward_profit_weight: float = 1.0
|
||||||
revenue_weight: float = 0.01
|
|
||||||
no_robust: bool = False
|
no_robust: bool = False
|
||||||
|
|
||||||
|
|
||||||
@@ -126,6 +135,7 @@ class RuntimeSpec:
|
|||||||
checkpoint_interval: int = 200_000
|
checkpoint_interval: int = 200_000
|
||||||
model_dir: str = "engine/models"
|
model_dir: str = "engine/models"
|
||||||
log_freq: int = 100
|
log_freq: int = 100
|
||||||
|
hist_freq: int = 500
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -157,6 +167,7 @@ class TrainSpec:
|
|||||||
"backend": self.runtime.backend,
|
"backend": self.runtime.backend,
|
||||||
"device": self.runtime.device,
|
"device": self.runtime.device,
|
||||||
"checkpoint_interval": self.runtime.checkpoint_interval,
|
"checkpoint_interval": self.runtime.checkpoint_interval,
|
||||||
|
"hist_freq": self.runtime.hist_freq,
|
||||||
"n_products": self.env.n_products,
|
"n_products": self.env.n_products,
|
||||||
"N": self.env.n_sessions,
|
"N": self.env.n_sessions,
|
||||||
"price_low": self.env.price_low,
|
"price_low": self.env.price_low,
|
||||||
@@ -167,6 +178,8 @@ class TrainSpec:
|
|||||||
"max_steps": self.env.max_steps,
|
"max_steps": self.env.max_steps,
|
||||||
"margin_floor": self.env.margin_floor,
|
"margin_floor": self.env.margin_floor,
|
||||||
"margin_floor_patience": self.env.margin_floor_patience,
|
"margin_floor_patience": self.env.margin_floor_patience,
|
||||||
|
"agent_mu": self.env.agent_mu,
|
||||||
|
"agent_std": self.env.agent_std,
|
||||||
"alpha": self.study.alpha,
|
"alpha": self.study.alpha,
|
||||||
"lambda_coi": self.study.lambda_coi,
|
"lambda_coi": self.study.lambda_coi,
|
||||||
"robust_radius": self.study.robust_radius,
|
"robust_radius": self.study.robust_radius,
|
||||||
@@ -175,7 +188,6 @@ class TrainSpec:
|
|||||||
"info_value": self.study.info_value,
|
"info_value": self.study.info_value,
|
||||||
"eta_ux": self.study.eta_ux,
|
"eta_ux": self.study.eta_ux,
|
||||||
"reward_profit_weight": self.study.reward_profit_weight,
|
"reward_profit_weight": self.study.reward_profit_weight,
|
||||||
"revenue_weight": self.study.revenue_weight,
|
|
||||||
"no_robust": self.study.no_robust,
|
"no_robust": self.study.no_robust,
|
||||||
"learning_rate": self.optimizer.learning_rate,
|
"learning_rate": self.optimizer.learning_rate,
|
||||||
"gamma": self.optimizer.gamma,
|
"gamma": self.optimizer.gamma,
|
||||||
@@ -246,6 +258,8 @@ class TrainSpec:
|
|||||||
max_steps=int(base["max_steps"]),
|
max_steps=int(base["max_steps"]),
|
||||||
margin_floor=float(base["margin_floor"]),
|
margin_floor=float(base["margin_floor"]),
|
||||||
margin_floor_patience=int(base["margin_floor_patience"]),
|
margin_floor_patience=int(base["margin_floor_patience"]),
|
||||||
|
agent_mu=float(base.get("agent_mu", 45.0)),
|
||||||
|
agent_std=float(base.get("agent_std", 15.0)),
|
||||||
),
|
),
|
||||||
study=StudySpec(
|
study=StudySpec(
|
||||||
alpha=float(base["alpha"]),
|
alpha=float(base["alpha"]),
|
||||||
@@ -256,7 +270,6 @@ class TrainSpec:
|
|||||||
info_value=float(base["info_value"]),
|
info_value=float(base["info_value"]),
|
||||||
eta_ux=float(base["eta_ux"]),
|
eta_ux=float(base["eta_ux"]),
|
||||||
reward_profit_weight=float(base["reward_profit_weight"]),
|
reward_profit_weight=float(base["reward_profit_weight"]),
|
||||||
revenue_weight=float(base["revenue_weight"]),
|
|
||||||
no_robust=no_robust,
|
no_robust=no_robust,
|
||||||
),
|
),
|
||||||
optimizer=OptimizerSpec(
|
optimizer=OptimizerSpec(
|
||||||
@@ -294,6 +307,7 @@ class TrainSpec:
|
|||||||
checkpoint_interval=int(base["checkpoint_interval"]),
|
checkpoint_interval=int(base["checkpoint_interval"]),
|
||||||
model_dir=str(base["model_dir"]),
|
model_dir=str(base["model_dir"]),
|
||||||
log_freq=int(base["log_freq"]),
|
log_freq=int(base["log_freq"]),
|
||||||
|
hist_freq=int(base["hist_freq"]),
|
||||||
),
|
),
|
||||||
eval=EvalSpec(
|
eval=EvalSpec(
|
||||||
eval_freq=int(base["eval_freq"]),
|
eval_freq=int(base["eval_freq"]),
|
||||||
@@ -304,9 +318,11 @@ class TrainSpec:
|
|||||||
|
|
||||||
|
|
||||||
def run_name(spec: TrainSpec, *, kind: str, scenario: str) -> str:
|
def run_name(spec: TrainSpec, *, kind: str, scenario: str) -> str:
|
||||||
|
alpha_token = f"{float(spec.study.alpha):.2f}".rstrip("0").rstrip(".")
|
||||||
|
mode = "baseline" if bool(spec.study.no_robust) else "defended"
|
||||||
return (
|
return (
|
||||||
f"{kind}/{spec.algorithm.name}/{spec.runtime.backend}/"
|
f"{kind}/{spec.algorithm.name}/{spec.runtime.backend}/"
|
||||||
f"{spec.runtime.device}/{scenario}/s{spec.runtime.seed}"
|
f"{spec.runtime.device}/{scenario}/a{alpha_token}/{mode}/s{spec.runtime.seed}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -318,6 +334,7 @@ def run_metadata(
|
|||||||
group: str | None = None,
|
group: str | None = None,
|
||||||
tags: Sequence[str] = (),
|
tags: Sequence[str] = (),
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
mode = "baseline" if bool(spec.study.no_robust) else "defended"
|
||||||
metadata: dict[str, Any] = {
|
metadata: dict[str, Any] = {
|
||||||
"run.kind": str(kind),
|
"run.kind": str(kind),
|
||||||
"run.algo": spec.algorithm.name,
|
"run.algo": spec.algorithm.name,
|
||||||
@@ -326,6 +343,10 @@ def run_metadata(
|
|||||||
"run.scenario": str(scenario),
|
"run.scenario": str(scenario),
|
||||||
"run.seed": spec.runtime.seed,
|
"run.seed": spec.runtime.seed,
|
||||||
"run.tags": list(tags),
|
"run.tags": list(tags),
|
||||||
|
"study/alpha": float(spec.study.alpha),
|
||||||
|
"study/mode": mode,
|
||||||
|
"study/baseline_mode": float(bool(spec.study.no_robust)),
|
||||||
|
"tiers": spec.algorithm.name,
|
||||||
}
|
}
|
||||||
if group:
|
if group:
|
||||||
metadata["run.group"] = group
|
metadata["run.group"] = group
|
||||||
|
|||||||
133
engine/studies/margin_erosion_alpha.py
Normal file
133
engine/studies/margin_erosion_alpha.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
"""validate core thesis problem: margin erosion under agent contamination
|
||||||
|
trains standard RL (no robust components) across α levels to demonstrate systematic failure
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import json, sys, time
|
||||||
|
from pathlib import Path
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||||
|
from engine.spec import TrainSpec
|
||||||
|
from engine.orchestrators import run_train_once
|
||||||
|
|
||||||
|
|
||||||
|
def _run_baseline(alpha: float, algo: str, seed: int, steps: int) -> dict:
|
||||||
|
spec = TrainSpec.from_flat(
|
||||||
|
{
|
||||||
|
"algo": algo,
|
||||||
|
"seed": seed,
|
||||||
|
"alpha": alpha,
|
||||||
|
"total_timesteps": steps,
|
||||||
|
"lambda_coi": 0.0,
|
||||||
|
"robust_radius": 0.0,
|
||||||
|
"robust_points": 1,
|
||||||
|
"robust_rollouts": 1,
|
||||||
|
"no_robust": True,
|
||||||
|
"arch": "small",
|
||||||
|
"n_products": 10,
|
||||||
|
"N": 100,
|
||||||
|
"max_steps": 50,
|
||||||
|
"eval_freq": 5000,
|
||||||
|
"eval_episodes": 10,
|
||||||
|
"log_freq": 500,
|
||||||
|
"robust_eval_enabled": False,
|
||||||
|
"agent_mu": 12.0,
|
||||||
|
"agent_std": 2.0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
result = run_train_once(
|
||||||
|
spec,
|
||||||
|
project="phantom-margin-erosion",
|
||||||
|
offline=True,
|
||||||
|
no_wandb=True,
|
||||||
|
kind="study",
|
||||||
|
scenario=f"alpha{int(alpha * 100):02d}",
|
||||||
|
group=f"baseline_{algo}",
|
||||||
|
extra_tags=("margin_erosion", "baseline"),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"alpha": alpha,
|
||||||
|
"algo": algo,
|
||||||
|
"seed": seed,
|
||||||
|
"eval_reward": result.get("eval/reward_mean", np.nan),
|
||||||
|
"eval_revenue": result.get("eval/revenue_mean", np.nan),
|
||||||
|
"eval_coi_level": result.get("eval/coi_level_mean", np.nan),
|
||||||
|
"eval_margin": result.get("eval/margin_mean", np.nan),
|
||||||
|
"eval_agent_prob": result.get("eval/agent_prob_mean", np.nan),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_margin_erosion_study(
|
||||||
|
alphas: list[float] | None = None,
|
||||||
|
algos: list[str] | None = None,
|
||||||
|
seeds: int = 3,
|
||||||
|
steps: int = 30_000,
|
||||||
|
) -> dict:
|
||||||
|
alphas = alphas or [0.1, 0.3, 0.5, 0.7, 0.9]
|
||||||
|
algos = algos or ["ppo", "dqn", "qtable"]
|
||||||
|
output_dir = Path(__file__).parent / "results"
|
||||||
|
output_dir.mkdir(exist_ok=True)
|
||||||
|
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for α in alphas:
|
||||||
|
for algo in algos:
|
||||||
|
for si in range(seeds):
|
||||||
|
seed = 42 + si
|
||||||
|
print(f"α={α:.1f} {algo} seed={seed}")
|
||||||
|
m = _run_baseline(α, algo, seed, steps)
|
||||||
|
results.append(m)
|
||||||
|
print(
|
||||||
|
f" margin={m['eval_margin']:.3f} rev={m['eval_revenue']:.0f} coi={m['eval_coi_level']:.1f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = {}
|
||||||
|
for α in alphas:
|
||||||
|
runs = [r for r in results if abs(r["alpha"] - α) < 0.01]
|
||||||
|
if not runs:
|
||||||
|
continue
|
||||||
|
s = {}
|
||||||
|
for metric in ["margin", "revenue", "coi_level", "agent_prob"]:
|
||||||
|
vals = [r[f"eval_{metric}"] for r in runs]
|
||||||
|
s[f"{metric}_mean"] = float(np.mean(vals))
|
||||||
|
s[f"{metric}_std"] = float(np.std(vals))
|
||||||
|
s["n_runs"] = len(runs)
|
||||||
|
summary[f"alpha_{α:.1f}"] = s
|
||||||
|
|
||||||
|
output = {
|
||||||
|
"timestamp": ts,
|
||||||
|
"config": {"alphas": alphas, "algos": algos, "seeds": seeds, "steps": steps},
|
||||||
|
"results": results,
|
||||||
|
"summary": summary,
|
||||||
|
}
|
||||||
|
|
||||||
|
path = output_dir / f"margin_erosion_alpha_{ts}.json"
|
||||||
|
with open(path, "w") as f:
|
||||||
|
json.dump(output, f, indent=2)
|
||||||
|
|
||||||
|
print(f"\n→ {path}")
|
||||||
|
for α in alphas:
|
||||||
|
k = f"alpha_{α:.1f}"
|
||||||
|
if k in summary:
|
||||||
|
s = summary[k]
|
||||||
|
print(
|
||||||
|
f" {k}: margin={s['margin_mean']:.3f}±{s['margin_std']:.3f} "
|
||||||
|
f"coi={s['coi_level_mean']:.1f}±{s['coi_level_std']:.1f}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser(description="margin erosion vs α")
|
||||||
|
p.add_argument("--quick", action="store_true", help="fast test")
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
run_margin_erosion_study(
|
||||||
|
alphas=[0.1, 0.7] if args.quick else [0.1, 0.3, 0.5, 0.7, 0.9],
|
||||||
|
algos=["qtable"] if args.quick else ["ppo", "dqn", "qtable"],
|
||||||
|
seeds=1 if args.quick else 3,
|
||||||
|
steps=5_000 if args.quick else 30_000,
|
||||||
|
)
|
||||||
60
engine/sweeps/final_thesis_proof.yaml
Normal file
60
engine/sweeps/final_thesis_proof.yaml
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
method: grid
|
||||||
|
metric:
|
||||||
|
name: eval/stress_reward_worst
|
||||||
|
goal: maximize
|
||||||
|
command:
|
||||||
|
- ${env}
|
||||||
|
- python
|
||||||
|
- -m
|
||||||
|
- engine.train
|
||||||
|
parameters:
|
||||||
|
algo:
|
||||||
|
value: ppo
|
||||||
|
backend:
|
||||||
|
value: sb3
|
||||||
|
device:
|
||||||
|
value: cpu
|
||||||
|
seed:
|
||||||
|
values: [42, 1337, 7777]
|
||||||
|
alpha:
|
||||||
|
values: [0.1, 0.2, 0.3, 0.4, 0.6, 0.8]
|
||||||
|
n_products:
|
||||||
|
values: [25, 50, 100]
|
||||||
|
N:
|
||||||
|
value: 100
|
||||||
|
no_robust:
|
||||||
|
values: [false, true]
|
||||||
|
lambda_coi:
|
||||||
|
values: [0.15, 0.30]
|
||||||
|
robust_radius:
|
||||||
|
value: 0.2
|
||||||
|
robust_points:
|
||||||
|
value: 7
|
||||||
|
robust_rollouts:
|
||||||
|
value: 1
|
||||||
|
eta_ux:
|
||||||
|
value: 0.5
|
||||||
|
reward_profit_weight:
|
||||||
|
value: 1.0
|
||||||
|
action_levels:
|
||||||
|
value: 9
|
||||||
|
action_scale_low:
|
||||||
|
value: 0.8
|
||||||
|
action_scale_high:
|
||||||
|
value: 1.2
|
||||||
|
total_timesteps:
|
||||||
|
value: 100000
|
||||||
|
eval_episodes:
|
||||||
|
value: 12
|
||||||
|
eval_freq:
|
||||||
|
value: 1000
|
||||||
|
log_freq:
|
||||||
|
value: 100
|
||||||
|
hist_freq:
|
||||||
|
value: 500
|
||||||
|
learning_rate:
|
||||||
|
value: 0.0003
|
||||||
|
batch_size:
|
||||||
|
value: 256
|
||||||
|
n_steps:
|
||||||
|
value: 2048
|
||||||
53
engine/sweeps/ppo_supra_guard.yaml
Normal file
53
engine/sweeps/ppo_supra_guard.yaml
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
method: random
|
||||||
|
metric:
|
||||||
|
name: eval/supra_share_mean
|
||||||
|
goal: minimize
|
||||||
|
run_cap: 256
|
||||||
|
command:
|
||||||
|
- ${env}
|
||||||
|
- python
|
||||||
|
- -m
|
||||||
|
- engine.train
|
||||||
|
parameters:
|
||||||
|
algo:
|
||||||
|
value: ppo
|
||||||
|
seed:
|
||||||
|
values: [42, 1337, 7777]
|
||||||
|
alpha:
|
||||||
|
values: [0.1, 0.2, 0.3, 0.4, 0.6]
|
||||||
|
n_products:
|
||||||
|
values: [25, 50]
|
||||||
|
N:
|
||||||
|
value: 100
|
||||||
|
no_robust:
|
||||||
|
values: [false, true]
|
||||||
|
lambda_coi:
|
||||||
|
values: [0.05, 0.15, 0.3]
|
||||||
|
robust_radius:
|
||||||
|
values: [0.1, 0.2, 0.3]
|
||||||
|
robust_points:
|
||||||
|
value: 7
|
||||||
|
robust_rollouts:
|
||||||
|
value: 1
|
||||||
|
eta_ux:
|
||||||
|
values: [0.05, 0.15, 0.3, 0.5, 0.75]
|
||||||
|
reward_profit_weight:
|
||||||
|
value: 1.0
|
||||||
|
total_timesteps:
|
||||||
|
value: 100000
|
||||||
|
eval_episodes:
|
||||||
|
value: 10
|
||||||
|
eval_freq:
|
||||||
|
value: 1000
|
||||||
|
log_freq:
|
||||||
|
value: 100
|
||||||
|
hist_freq:
|
||||||
|
value: 500
|
||||||
|
learning_rate:
|
||||||
|
value: 0.0003
|
||||||
|
batch_size:
|
||||||
|
value: 256
|
||||||
|
n_steps:
|
||||||
|
value: 2048
|
||||||
|
device:
|
||||||
|
value: cpu
|
||||||
@@ -36,7 +36,12 @@ def canonicalize_metrics(raw: Mapping[str, Any], spec: TrainSpec) -> dict[str, A
|
|||||||
|
|
||||||
eval_reward = (
|
eval_reward = (
|
||||||
_as_float(
|
_as_float(
|
||||||
metrics.get("eval/robust_reward_worst", metrics.get("eval/reward_mean")),
|
metrics.get(
|
||||||
|
"eval/stress_reward_worst",
|
||||||
|
metrics.get(
|
||||||
|
"eval/robust_reward_worst", metrics.get("eval/reward_mean")
|
||||||
|
),
|
||||||
|
),
|
||||||
0.0,
|
0.0,
|
||||||
)
|
)
|
||||||
or 0.0
|
or 0.0
|
||||||
@@ -51,9 +56,12 @@ def canonicalize_metrics(raw: Mapping[str, Any], spec: TrainSpec) -> dict[str, A
|
|||||||
metrics["objective/coi_preserved"] = 0.0 if coi_level is None else coi_level
|
metrics["objective/coi_preserved"] = 0.0 if coi_level is None else coi_level
|
||||||
|
|
||||||
metrics["study/alpha"] = spec.study.alpha
|
metrics["study/alpha"] = spec.study.alpha
|
||||||
|
metrics["study/mode"] = "baseline" if bool(spec.study.no_robust) else "defended"
|
||||||
|
metrics["study/baseline_mode"] = float(bool(spec.study.no_robust))
|
||||||
metrics["study/lambda_coi"] = spec.study.lambda_coi
|
metrics["study/lambda_coi"] = spec.study.lambda_coi
|
||||||
metrics["study/robust_radius"] = spec.study.robust_radius
|
metrics["study/ambiguity_radius"] = spec.study.robust_radius
|
||||||
metrics["study/info_value"] = spec.study.info_value
|
metrics["study/info_value"] = spec.study.info_value
|
||||||
|
metrics["tiers"] = spec.algorithm.name
|
||||||
|
|
||||||
metrics["runtime/backend"] = spec.runtime.backend
|
metrics["runtime/backend"] = spec.runtime.backend
|
||||||
metrics["runtime/device"] = spec.runtime.device
|
metrics["runtime/device"] = spec.runtime.device
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
from typing import Any, Callable, Iterable, Mapping
|
from typing import Any, Callable, Iterable, Mapping
|
||||||
|
|
||||||
|
|
||||||
@@ -19,6 +21,42 @@ def _require_wandb():
|
|||||||
return wandb
|
return wandb
|
||||||
|
|
||||||
|
|
||||||
|
def _warn(message: str) -> None:
|
||||||
|
print(f"PHANTOM_WANDB_WARNING: {message}")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_key(raw_key: str) -> str | None:
|
||||||
|
key = str(raw_key)
|
||||||
|
replacements = {
|
||||||
|
"no_robust": "baseline_mode",
|
||||||
|
"study/no_robust": "study/baseline_mode",
|
||||||
|
"study/robust_radius": "study/ambiguity_radius",
|
||||||
|
"robust_radius": "ambiguity_radius",
|
||||||
|
"robust_points": "ambiguity_points",
|
||||||
|
"robust_rollouts": "ambiguity_rollouts",
|
||||||
|
"robust_eval_enabled": "stress_eval_enabled",
|
||||||
|
"eval/robust_alpha_high": "eval/stress_alpha_high",
|
||||||
|
"eval/robust_alpha_low": "eval/stress_alpha_low",
|
||||||
|
"eval/robust_reward_worst": "eval/stress_reward_worst",
|
||||||
|
"eval/robust_revenue_worst": "eval/stress_revenue_worst",
|
||||||
|
"eval/robust_coi_leakage_worst": "eval/stress_coi_leakage_worst",
|
||||||
|
}
|
||||||
|
key = replacements.get(key, key)
|
||||||
|
if "robust" in key.lower():
|
||||||
|
return None
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_payload(payload: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
sanitized: dict[str, Any] = {}
|
||||||
|
for key, value in payload.items():
|
||||||
|
clean_key = _sanitize_key(str(key))
|
||||||
|
if clean_key is None:
|
||||||
|
continue
|
||||||
|
sanitized[clean_key] = value
|
||||||
|
return sanitized
|
||||||
|
|
||||||
|
|
||||||
def init_run(
|
def init_run(
|
||||||
*,
|
*,
|
||||||
mode: str,
|
mode: str,
|
||||||
@@ -34,7 +72,11 @@ def init_run(
|
|||||||
if group:
|
if group:
|
||||||
kwargs["group"] = group
|
kwargs["group"] = group
|
||||||
if sweep_mode:
|
if sweep_mode:
|
||||||
|
try:
|
||||||
run = wandb.init(**kwargs)
|
run = wandb.init(**kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"init failed in sweep mode ({exc})")
|
||||||
|
return None
|
||||||
if name and run is not None:
|
if name and run is not None:
|
||||||
run.name = name
|
run.name = name
|
||||||
return run
|
return run
|
||||||
@@ -42,18 +84,25 @@ def init_run(
|
|||||||
init_kwargs = dict(kwargs)
|
init_kwargs = dict(kwargs)
|
||||||
init_kwargs["project"] = project
|
init_kwargs["project"] = project
|
||||||
if config is not None:
|
if config is not None:
|
||||||
init_kwargs["config"] = dict(config)
|
init_kwargs["config"] = _sanitize_payload(dict(config))
|
||||||
if name:
|
if name:
|
||||||
init_kwargs["name"] = name
|
init_kwargs["name"] = name
|
||||||
if tags:
|
if tags:
|
||||||
init_kwargs["tags"] = list(tags)
|
init_kwargs["tags"] = list(tags)
|
||||||
|
try:
|
||||||
return wandb.init(**init_kwargs)
|
return wandb.init(**init_kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"init failed ({exc})")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def finish_run() -> None:
|
def finish_run() -> None:
|
||||||
wandb = get_wandb_module()
|
wandb = get_wandb_module()
|
||||||
if wandb is not None and wandb.run is not None:
|
if wandb is not None and wandb.run is not None:
|
||||||
|
try:
|
||||||
wandb.finish()
|
wandb.finish()
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"finish failed ({exc})")
|
||||||
|
|
||||||
|
|
||||||
def current_config() -> dict[str, Any]:
|
def current_config() -> dict[str, Any]:
|
||||||
@@ -67,25 +116,45 @@ def update_run_config(config: Mapping[str, Any]) -> None:
|
|||||||
wandb = get_wandb_module()
|
wandb = get_wandb_module()
|
||||||
if wandb is None or wandb.run is None:
|
if wandb is None or wandb.run is None:
|
||||||
return
|
return
|
||||||
|
payload = _sanitize_payload(dict(config))
|
||||||
|
if not payload:
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
wandb.config.update(dict(config), allow_val_change=True)
|
wandb.config.update(payload, allow_val_change=True)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
wandb.config.update(dict(config))
|
try:
|
||||||
|
wandb.config.update(payload)
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"config update failed ({exc})")
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"config update failed ({exc})")
|
||||||
|
|
||||||
|
|
||||||
def log_metrics(metrics: Mapping[str, Any], *, step: int) -> None:
|
def log_metrics(metrics: Mapping[str, Any], *, step: int) -> None:
|
||||||
wandb = get_wandb_module()
|
wandb = get_wandb_module()
|
||||||
if wandb is None or wandb.run is None:
|
if wandb is None or wandb.run is None:
|
||||||
return
|
return
|
||||||
wandb.log(dict(metrics), step=step)
|
payload = _sanitize_payload(dict(metrics))
|
||||||
|
if not payload:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
wandb.log(payload, step=step)
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"log failed at step {step} ({exc})")
|
||||||
|
|
||||||
|
|
||||||
def update_summary(metrics: Mapping[str, Any]) -> None:
|
def update_summary(metrics: Mapping[str, Any]) -> None:
|
||||||
wandb = get_wandb_module()
|
wandb = get_wandb_module()
|
||||||
if wandb is None or wandb.run is None:
|
if wandb is None or wandb.run is None:
|
||||||
return
|
return
|
||||||
for key, value in metrics.items():
|
payload = _sanitize_payload(dict(metrics))
|
||||||
|
if not payload:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
for key, value in payload.items():
|
||||||
wandb.run.summary[key] = value
|
wandb.run.summary[key] = value
|
||||||
|
except Exception as exc:
|
||||||
|
_warn(f"summary update failed ({exc})")
|
||||||
|
|
||||||
|
|
||||||
def run_agent(
|
def run_agent(
|
||||||
@@ -95,4 +164,39 @@ def run_agent(
|
|||||||
count: int | None = None,
|
count: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
wandb = _require_wandb()
|
wandb = _require_wandb()
|
||||||
wandb.agent(sweep_id, function=fn, count=count)
|
retry_max = max(0, int(os.getenv("PHANTOM_WANDB_AGENT_RETRIES", "8")))
|
||||||
|
retry_delay = max(1.0, float(os.getenv("PHANTOM_WANDB_AGENT_RETRY_DELAY", "5")))
|
||||||
|
retry_backoff = max(
|
||||||
|
1.0, float(os.getenv("PHANTOM_WANDB_AGENT_RETRY_BACKOFF", "1.5"))
|
||||||
|
)
|
||||||
|
retry_max_delay = max(
|
||||||
|
retry_delay,
|
||||||
|
float(os.getenv("PHANTOM_WANDB_AGENT_MAX_RETRY_DELAY", "60")),
|
||||||
|
)
|
||||||
|
|
||||||
|
target = None if count is None else max(0, int(count))
|
||||||
|
completed = 0
|
||||||
|
|
||||||
|
def _wrapped() -> None:
|
||||||
|
nonlocal completed
|
||||||
|
fn()
|
||||||
|
completed += 1
|
||||||
|
|
||||||
|
attempt = 0
|
||||||
|
while True:
|
||||||
|
remaining = None if target is None else max(0, int(target - completed))
|
||||||
|
if target is not None and remaining == 0:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
wandb.agent(sweep_id, function=_wrapped, count=remaining)
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
attempt += 1
|
||||||
|
if attempt > retry_max:
|
||||||
|
raise
|
||||||
|
wait = min(retry_max_delay, retry_delay * (retry_backoff ** (attempt - 1)))
|
||||||
|
_warn(
|
||||||
|
f"agent disconnected (attempt {attempt}/{retry_max}, "
|
||||||
|
f"completed={completed}, remaining={remaining}): {exc}"
|
||||||
|
)
|
||||||
|
time.sleep(wait)
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||||||
parser.add_argument("--total-timesteps", type=int)
|
parser.add_argument("--total-timesteps", type=int)
|
||||||
parser.add_argument("--model-dir", type=str)
|
parser.add_argument("--model-dir", type=str)
|
||||||
parser.add_argument("--log-freq", type=int)
|
parser.add_argument("--log-freq", type=int)
|
||||||
|
parser.add_argument("--hist-freq", type=int)
|
||||||
parser.add_argument("--checkpoint-interval", type=int)
|
parser.add_argument("--checkpoint-interval", type=int)
|
||||||
parser.add_argument("--device", type=str)
|
parser.add_argument("--device", type=str)
|
||||||
|
|
||||||
@@ -68,7 +69,6 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||||||
parser.add_argument("--no-robust", action="store_true")
|
parser.add_argument("--no-robust", action="store_true")
|
||||||
parser.add_argument("--eta-ux", type=float)
|
parser.add_argument("--eta-ux", type=float)
|
||||||
parser.add_argument("--reward-profit-weight", type=float)
|
parser.add_argument("--reward-profit-weight", type=float)
|
||||||
parser.add_argument("--revenue-weight", type=float)
|
|
||||||
|
|
||||||
parser.add_argument("--price-low", type=float)
|
parser.add_argument("--price-low", type=float)
|
||||||
parser.add_argument("--price-high", type=float)
|
parser.add_argument("--price-high", type=float)
|
||||||
@@ -126,6 +126,7 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
"total_timesteps": args.total_timesteps,
|
"total_timesteps": args.total_timesteps,
|
||||||
"model_dir": args.model_dir,
|
"model_dir": args.model_dir,
|
||||||
"log_freq": args.log_freq,
|
"log_freq": args.log_freq,
|
||||||
|
"hist_freq": args.hist_freq,
|
||||||
"checkpoint_interval": args.checkpoint_interval,
|
"checkpoint_interval": args.checkpoint_interval,
|
||||||
"device": args.device,
|
"device": args.device,
|
||||||
"alpha": args.alpha,
|
"alpha": args.alpha,
|
||||||
@@ -139,7 +140,6 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
"no_robust": args.no_robust,
|
"no_robust": args.no_robust,
|
||||||
"eta_ux": args.eta_ux,
|
"eta_ux": args.eta_ux,
|
||||||
"reward_profit_weight": args.reward_profit_weight,
|
"reward_profit_weight": args.reward_profit_weight,
|
||||||
"revenue_weight": args.revenue_weight,
|
|
||||||
"price_low": args.price_low,
|
"price_low": args.price_low,
|
||||||
"price_high": args.price_high,
|
"price_high": args.price_high,
|
||||||
"action_levels": args.action_levels,
|
"action_levels": args.action_levels,
|
||||||
@@ -179,8 +179,29 @@ def _overrides_from_args(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def main(argv: list[str] | None = None) -> None:
|
def main(argv: list[str] | None = None) -> None:
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# Ensure data is downloaded
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
project_root = Path(__file__).parents[1]
|
||||||
|
data_dir = project_root / "experiments" / "collected_data"
|
||||||
|
needs_pull = (not data_dir.exists()) or (not any(data_dir.iterdir()))
|
||||||
|
if needs_pull:
|
||||||
|
try:
|
||||||
|
subprocess.run(["make", "data.pull"], cwd=str(project_root), check=True)
|
||||||
|
except (subprocess.SubprocessError, OSError) as exc:
|
||||||
|
sys.path.insert(0, str(project_root))
|
||||||
|
try:
|
||||||
|
from scripts.hf_data import pull
|
||||||
|
|
||||||
|
pull()
|
||||||
|
except (ImportError, OSError, RuntimeError, ValueError) as fallback_exc:
|
||||||
|
print(
|
||||||
|
f"Warning: data.pull failed ({exc}); fallback pull failed ({fallback_exc})"
|
||||||
|
)
|
||||||
|
|
||||||
configure_logging()
|
configure_logging()
|
||||||
raw_args = list(sys.argv[1:] if argv is None else argv)
|
raw_args = list(sys.argv[1:] if argv is None else argv)
|
||||||
run_kind = _probe_run_kind(raw_args)
|
run_kind = _probe_run_kind(raw_args)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from .lib.coi import (
|
|||||||
)
|
)
|
||||||
from .lib.behavior import get_transition_models, trajectory_to_events
|
from .lib.behavior import get_transition_models, trajectory_to_events
|
||||||
from .lib.wrappers import EconomicMetricsWrapper
|
from .lib.wrappers import EconomicMetricsWrapper
|
||||||
|
from .jax.robust import select_adversarial_alpha_jax, _JAX_OK
|
||||||
|
|
||||||
|
|
||||||
class _ActionPricingEngine(PricingEngine):
|
class _ActionPricingEngine(PricingEngine):
|
||||||
@@ -121,6 +122,7 @@ class PHANTOM(gym.Env):
|
|||||||
self._prices = None
|
self._prices = None
|
||||||
self._demand = None
|
self._demand = None
|
||||||
self._step_count = 0
|
self._step_count = 0
|
||||||
|
self._global_step = 0 # monotonic; used as JAX RNG seed across resets
|
||||||
self._demand_history = []
|
self._demand_history = []
|
||||||
self._price_history = []
|
self._price_history = []
|
||||||
self._revenue_history = []
|
self._revenue_history = []
|
||||||
@@ -128,6 +130,13 @@ class PHANTOM(gym.Env):
|
|||||||
self._initial_episode_prices = None
|
self._initial_episode_prices = None
|
||||||
self._trajectories = [] # session trajectories for agent prob calculation
|
self._trajectories = [] # session trajectories for agent prob calculation
|
||||||
self.baseline_prices = np.full(self.n_products, self.price_bounds[0])
|
self.baseline_prices = np.full(self.n_products, self.price_bounds[0])
|
||||||
|
self.anchor_prices = np.full(
|
||||||
|
self.n_products,
|
||||||
|
float(np.clip(float(self.human_params[0]), *self.price_bounds)),
|
||||||
|
)
|
||||||
|
self.competitive_cap = float(
|
||||||
|
min(self.price_bounds[1], float(np.mean(self.anchor_prices)) * 1.15)
|
||||||
|
)
|
||||||
self._low_margin_streak = 0 # consecutive steps below margin_floor
|
self._low_margin_streak = 0 # consecutive steps below margin_floor
|
||||||
self._last_agent_prob = float(self.alpha)
|
self._last_agent_prob = float(self.alpha)
|
||||||
self._last_alpha_adv = float(self.alpha)
|
self._last_alpha_adv = float(self.alpha)
|
||||||
@@ -167,19 +176,28 @@ class PHANTOM(gym.Env):
|
|||||||
self.market.Nhumans = self.N - n_agents
|
self.market.Nhumans = self.N - n_agents
|
||||||
|
|
||||||
def _decode_action(self, action) -> np.ndarray:
|
def _decode_action(self, action) -> np.ndarray:
|
||||||
base = (
|
prev = self._prices
|
||||||
self._prices
|
base = self.anchor_prices
|
||||||
if self._prices is not None
|
|
||||||
else np.full(self.n_products, self.price_bounds[0], dtype=float)
|
def _blend(target: np.ndarray) -> np.ndarray:
|
||||||
)
|
if prev is None:
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(target, lower, self.competitive_cap)
|
||||||
|
blended = 0.75 * np.asarray(prev, dtype=float) + 0.25 * target
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(blended, lower, self.competitive_cap)
|
||||||
|
|
||||||
if np.isscalar(action):
|
if np.isscalar(action):
|
||||||
idx = int(np.clip(int(action), 0, self.action_levels - 1))
|
idx = int(np.clip(int(action), 0, self.action_levels - 1))
|
||||||
return np.clip(base * self._action_scales[idx], *self.price_bounds)
|
target = base * self._action_scales[idx]
|
||||||
|
return _blend(target)
|
||||||
a = np.asarray(action)
|
a = np.asarray(action)
|
||||||
if a.size == 1:
|
if a.size == 1:
|
||||||
idx = int(np.clip(int(a.reshape(-1)[0]), 0, self.action_levels - 1))
|
idx = int(np.clip(int(a.reshape(-1)[0]), 0, self.action_levels - 1))
|
||||||
return np.clip(base * self._action_scales[idx], *self.price_bounds)
|
target = base * self._action_scales[idx]
|
||||||
return np.clip(a.astype(float), *self.price_bounds)
|
return _blend(target)
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(a.astype(float), lower, self.competitive_cap)
|
||||||
|
|
||||||
def _compute_agent_prob(self, trajectories=None) -> float:
|
def _compute_agent_prob(self, trajectories=None) -> float:
|
||||||
trajectories = (
|
trajectories = (
|
||||||
@@ -214,18 +232,23 @@ class PHANTOM(gym.Env):
|
|||||||
coi_penalty = self.lambda_coi * coi_leakage * info_budget
|
coi_penalty = self.lambda_coi * coi_leakage * info_budget
|
||||||
|
|
||||||
if len(self._price_history) > 0:
|
if len(self._price_history) > 0:
|
||||||
volatility = float(
|
prev_prices = np.asarray(self._price_history[-1], dtype=float)
|
||||||
np.mean(
|
rel_change = (prices - prev_prices) / np.maximum(prev_prices, 1.0)
|
||||||
np.abs(prices - self._price_history[-1])
|
volatility = float(np.mean(np.abs(rel_change)))
|
||||||
/ np.maximum(self.baseline_prices, 1.0)
|
upward_volatility = float(np.mean(np.clip(rel_change, 0.0, None)))
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
volatility = 0.0
|
volatility = 0.0
|
||||||
ux_penalty = self.eta_ux * info_budget * volatility
|
upward_volatility = 0.0
|
||||||
|
ux_penalty = self.eta_ux * info_budget * (volatility + 0.5 * upward_volatility)
|
||||||
|
|
||||||
|
competitive_anchor = float(np.mean(self.anchor_prices))
|
||||||
|
price_ratio = prices / max(competitive_anchor, 1.0)
|
||||||
|
supra_excess = np.clip(price_ratio - 1.15, 0.0, None)
|
||||||
|
supra_penalty = 4.0 * info_budget * float(np.mean(np.square(supra_excess)))
|
||||||
|
supra_share = float(np.mean(supra_excess > 0.0))
|
||||||
|
|
||||||
reward_revenue = self.reward_profit_weight * profit
|
reward_revenue = self.reward_profit_weight * profit
|
||||||
reward = reward_revenue - coi_penalty - ux_penalty
|
reward = reward_revenue - coi_penalty - ux_penalty - supra_penalty
|
||||||
|
|
||||||
return reward, {
|
return reward, {
|
||||||
"revenue": revenue,
|
"revenue": revenue,
|
||||||
@@ -238,6 +261,10 @@ class PHANTOM(gym.Env):
|
|||||||
"coi_info_budget": info_budget,
|
"coi_info_budget": info_budget,
|
||||||
"ux_penalty": ux_penalty,
|
"ux_penalty": ux_penalty,
|
||||||
"volatility": volatility,
|
"volatility": volatility,
|
||||||
|
"upward_volatility": upward_volatility,
|
||||||
|
"supra_penalty": supra_penalty,
|
||||||
|
"supra_share": supra_share,
|
||||||
|
"competitive_anchor": competitive_anchor,
|
||||||
"reward_revenue": reward_revenue,
|
"reward_revenue": reward_revenue,
|
||||||
"reward_total": reward,
|
"reward_total": reward,
|
||||||
}
|
}
|
||||||
@@ -261,8 +288,37 @@ class PHANTOM(gym.Env):
|
|||||||
return float(np.mean(rewards)) if rewards else 0.0
|
return float(np.mean(rewards)) if rewards else 0.0
|
||||||
|
|
||||||
def _select_adversarial_alpha(self, prices: np.ndarray) -> float:
|
def _select_adversarial_alpha(self, prices: np.ndarray) -> float:
|
||||||
"""inner robust step: evaluate candidates and pick worst-case alpha"""
|
"""inner robust step: pick worst-case alpha from the ambiguity interval.
|
||||||
|
|
||||||
|
when JAX is available and robust_rollouts==1 we use a vmapped pass over
|
||||||
|
all K candidates in a single call (no Python loop, no market.act overhead).
|
||||||
|
the JAX path approximates demand as the mixed closed-form d(p;theta) signal
|
||||||
|
rather than running full trajectory sampling, which is accurate for the
|
||||||
|
alpha-selection decision while being dramatically cheaper.
|
||||||
|
|
||||||
|
when robust_rollouts>1 or JAX is unavailable we fall back to the sequential
|
||||||
|
market.act() loop so behavior is identical to the original implementation.
|
||||||
|
"""
|
||||||
candidates = self._alpha_candidates()
|
candidates = self._alpha_candidates()
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return float(candidates[0])
|
||||||
|
|
||||||
|
if _JAX_OK and self.robust_rollouts == 1:
|
||||||
|
best_alpha, _ = select_adversarial_alpha_jax(
|
||||||
|
candidates=candidates,
|
||||||
|
prices=prices,
|
||||||
|
human_params=self.market.human_params,
|
||||||
|
agent_params=self.market.agent_params,
|
||||||
|
noise_std=self.market.noise_std,
|
||||||
|
baseline_prices=self.baseline_prices,
|
||||||
|
lambda_coi=self.lambda_coi,
|
||||||
|
info_value=self.info_value,
|
||||||
|
reward_profit_weight=self.reward_profit_weight,
|
||||||
|
rng_seed=self._global_step,
|
||||||
|
)
|
||||||
|
return best_alpha
|
||||||
|
|
||||||
|
# fallback: full trajectory-based sequential evaluation
|
||||||
evaluations = [
|
evaluations = [
|
||||||
(float(alpha), self._evaluate_candidate(float(alpha), prices))
|
(float(alpha), self._evaluate_candidate(float(alpha), prices))
|
||||||
for alpha in candidates
|
for alpha in candidates
|
||||||
@@ -299,6 +355,7 @@ class PHANTOM(gym.Env):
|
|||||||
def step(self, action):
|
def step(self, action):
|
||||||
self._prices = self._decode_action(action)
|
self._prices = self._decode_action(action)
|
||||||
alpha_adv = self._select_adversarial_alpha(self._prices)
|
alpha_adv = self._select_adversarial_alpha(self._prices)
|
||||||
|
self._global_step += 1 # always increment; JAX path may have already done so
|
||||||
self._set_market_mix(alpha_adv)
|
self._set_market_mix(alpha_adv)
|
||||||
self._platform_stub.set_prices(self._prices)
|
self._platform_stub.set_prices(self._prices)
|
||||||
self._step_count += 1
|
self._step_count += 1
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
All hardcoded paths should reference this module
|
All hardcoded paths should reference this module
|
||||||
Paths can be overridden via environment variables
|
Paths can be overridden via environment variables
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -9,24 +10,34 @@ from pathlib import Path
|
|||||||
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||||
|
|
||||||
# data directories
|
# data directories
|
||||||
DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data'))
|
DATA_DIR = Path(os.getenv("PHANTOM_DATA_DIR", PROJECT_ROOT / "data"))
|
||||||
EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments'))
|
EXPERIMENTS_DIR = Path(
|
||||||
|
os.getenv("PHANTOM_EXPERIMENTS_DIR", PROJECT_ROOT / "experiments")
|
||||||
|
)
|
||||||
|
|
||||||
# agent/human interaction data
|
# agent/human interaction data
|
||||||
AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents'))
|
AGENT_DATA_DIR = Path(os.getenv("PHANTOM_AGENT_DATA_DIR", DATA_DIR / "agents"))
|
||||||
HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans'))
|
HUMAN_DATA_DIR = Path(os.getenv("PHANTOM_HUMAN_DATA_DIR", DATA_DIR / "humans"))
|
||||||
|
|
||||||
# RL simulation runs
|
# RL simulation runs
|
||||||
SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs'))
|
SIM_RUNS_DIR = Path(
|
||||||
|
os.getenv("PHANTOM_SIM_RUNS_DIR", PROJECT_ROOT / "sim" / "rl" / "runs")
|
||||||
|
)
|
||||||
|
|
||||||
# model artifacts
|
# model artifacts
|
||||||
MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models'))
|
MODEL_REGISTRY_DIR = Path(os.getenv("PHANTOM_MODEL_REGISTRY_DIR", DATA_DIR / "models"))
|
||||||
|
|
||||||
# collected experiment data
|
# collected experiment data
|
||||||
COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data'))
|
COLLECTED_DATA_DIR = Path(
|
||||||
|
os.getenv(
|
||||||
|
"PHANTOM_COLLECTED_DATA_DIR", EXPERIMENTS_DIR / "agents" / "collected_data"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# notebook outputs
|
# notebook outputs
|
||||||
NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs'))
|
NOTEBOOK_OUTPUT_DIR = Path(
|
||||||
|
os.getenv("PHANTOM_NOTEBOOK_OUTPUT_DIR", EXPERIMENTS_DIR / "notebooks" / "outputs")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def ensure_dir(path: Path) -> Path:
|
def ensure_dir(path: Path) -> Path:
|
||||||
@@ -51,15 +62,18 @@ def get_sim_path(*parts: str) -> Path:
|
|||||||
|
|
||||||
|
|
||||||
# service configuration (from .env)
|
# service configuration (from .env)
|
||||||
KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost')
|
KAFKA_HOST = os.getenv("KAFKA_HOST", "localhost")
|
||||||
KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
|
KAFKA_PORT = os.getenv("KAFKA_PORT", "9092")
|
||||||
KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
|
KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
|
||||||
|
|
||||||
REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
|
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
||||||
REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
|
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
||||||
|
|
||||||
SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '')
|
SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL", "")
|
||||||
SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '')
|
SUPABASE_ANON_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY", "")
|
||||||
|
|
||||||
BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000'))
|
BACKEND_PORT = int(os.getenv("BACKEND_PORT", "5000"))
|
||||||
PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001'))
|
PROVIDER_PORT = int(os.getenv("PROVIDER_PORT", "5001"))
|
||||||
|
|
||||||
|
# huggingface dataset repo for collected behavioral data
|
||||||
|
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "velocitatem/phantom-collected-data")
|
||||||
|
|||||||
15
nx.json
15
nx.json
@@ -58,6 +58,21 @@
|
|||||||
"benchmark": {
|
"benchmark": {
|
||||||
"cache": false
|
"cache": false
|
||||||
},
|
},
|
||||||
|
"whoclicked-publish": {
|
||||||
|
"cache": false
|
||||||
|
},
|
||||||
|
"tpu-ray-bootstrap": {
|
||||||
|
"cache": false
|
||||||
|
},
|
||||||
|
"tpu-ray-deps": {
|
||||||
|
"cache": false
|
||||||
|
},
|
||||||
|
"tpu-ray-verify": {
|
||||||
|
"cache": false
|
||||||
|
},
|
||||||
|
"tpu-ray-teardown": {
|
||||||
|
"cache": false
|
||||||
|
},
|
||||||
"up": {
|
"up": {
|
||||||
"cache": false
|
"cache": false
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
],
|
],
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"nx": "nx",
|
"nx": "nx",
|
||||||
|
"manim:render": "nx run manim:render",
|
||||||
|
"manim:render-all": "nx run manim:render-all",
|
||||||
"projects": "nx show projects",
|
"projects": "nx show projects",
|
||||||
"graph": "nx graph",
|
"graph": "nx graph",
|
||||||
"web:dev": "nx run web:dev",
|
"web:dev": "nx run web:dev",
|
||||||
|
|||||||
@@ -1,84 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from scenes import SCENE_ORDER
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
|
||||||
parser = argparse.ArgumentParser(description="Render thesis-defense Manim scenes")
|
|
||||||
parser.add_argument(
|
|
||||||
"--quality",
|
|
||||||
default="qm",
|
|
||||||
choices=["ql", "qm", "qh", "qk"],
|
|
||||||
help="Manim quality preset",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--scene",
|
|
||||||
action="append",
|
|
||||||
dest="scenes",
|
|
||||||
help="Scene name; repeat flag to render many",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--preview", action="store_true", help="Open video after each render"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--list", action="store_true", help="List available scenes and exit"
|
|
||||||
)
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def validate_requested(requested: list[str]) -> list[str]:
|
|
||||||
missing = [name for name in requested if name not in SCENE_ORDER]
|
|
||||||
if missing:
|
|
||||||
choices = ", ".join(SCENE_ORDER)
|
|
||||||
raise ValueError(f"Unknown scenes: {', '.join(missing)}. Choices: {choices}")
|
|
||||||
return requested
|
|
||||||
|
|
||||||
|
|
||||||
def run_manim(scene_file: Path, scene_name: str, quality: str, preview: bool) -> None:
|
|
||||||
cmd = [sys.executable, "-m", "manim"]
|
|
||||||
if preview:
|
|
||||||
cmd.append("-p")
|
|
||||||
cmd.extend([f"-{quality}", str(scene_file), scene_name])
|
|
||||||
subprocess.run(cmd, cwd=scene_file.parent, check=True)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
args = parse_args()
|
|
||||||
if args.list:
|
|
||||||
for scene in SCENE_ORDER:
|
|
||||||
print(scene)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
scenes = validate_requested(args.scenes) if args.scenes else list(SCENE_ORDER)
|
|
||||||
scene_file = Path(__file__).resolve().parent / "scenes.py"
|
|
||||||
|
|
||||||
try:
|
|
||||||
for scene_name in scenes:
|
|
||||||
run_manim(
|
|
||||||
scene_file=scene_file,
|
|
||||||
scene_name=scene_name,
|
|
||||||
quality=args.quality,
|
|
||||||
preview=args.preview,
|
|
||||||
)
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(
|
|
||||||
"manim executable not found. Install Manim in your Python environment.",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
return 2
|
|
||||||
except ValueError as exc:
|
|
||||||
print(str(exc), file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
except subprocess.CalledProcessError as exc:
|
|
||||||
return exc.returncode
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main())
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -630,3 +630,41 @@ Volume: 21},
|
|||||||
note = {Publisher: Institute of Mathematical Statistics},
|
note = {Publisher: Institute of Mathematical Statistics},
|
||||||
pages = {50 -- 60},
|
pages = {50 -- 60},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{horace_he_and_thinking_machines_lab_defeating_2025,
|
||||||
|
title = {Defeating {Nondeterminism} in {LLM} {Inference}},
|
||||||
|
url = {https://thinkingmachines.ai/blog/defeating-nondeterminism-in-llm-inference/},
|
||||||
|
doi = {10.64434/tml.20250910},
|
||||||
|
abstract = {Reproducibility is a bedrock of scientific progress. However, it’s remarkably difficult to get reproducible results out of large language models.
|
||||||
|
For example, you might observe that asking ChatGPT the same question multiple times provides different results. This by itself is not surprising, since getting a result from a language model involves “sampling”, a process that converts the language model’s output into a probability distribution and probabilistically selects a token.
|
||||||
|
What might be more surprising is that even when we adjust the temperature down to 0This means that the LLM always chooses the highest probability token, which is called greedy sampling. (thus making the sampling theoretically deterministic), LLM APIs are still not deterministic in practice (see past discussions here, here, or here). Even when running inference on your own hardware with an OSS inference library like vLLM or SGLang, sampling still isn’t deterministic (see here or here).},
|
||||||
|
language = {en},
|
||||||
|
urldate = {2026-03-10},
|
||||||
|
journal = {Thinking Machines Lab: Connectionism},
|
||||||
|
author = {{Horace He and Thinking Machines Lab}},
|
||||||
|
year = {2025},
|
||||||
|
file = {Snapshot:/home/velocitatem/Zotero/storage/U5JG4CNM/defeating-nondeterminism-in-llm-inference.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{moritz_ray_2018,
|
||||||
|
title = {Ray: {A} {Distributed} {Framework} for {Emerging} {AI} {Applications}},
|
||||||
|
shorttitle = {Ray},
|
||||||
|
url = {http://arxiv.org/abs/1712.05889},
|
||||||
|
doi = {10.48550/arXiv.1712.05889},
|
||||||
|
abstract = {The next generation of AI applications will continuously interact with the environment and learn from these interactions. These applications impose new and demanding systems requirements, both in terms of performance and flexibility. In this paper, we consider these requirements and present Ray---a distributed system to address them. Ray implements a unified interface that can express both task-parallel and actor-based computations, supported by a single dynamic execution engine. To meet the performance requirements, Ray employs a distributed scheduler and a distributed and fault-tolerant store to manage the system's control state. In our experiments, we demonstrate scaling beyond 1.8 million tasks per second and better performance than existing specialized systems for several challenging reinforcement learning applications.},
|
||||||
|
urldate = {2026-03-13},
|
||||||
|
publisher = {arXiv},
|
||||||
|
author = {Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I. and Stoica, Ion},
|
||||||
|
month = sep,
|
||||||
|
year = {2018},
|
||||||
|
note = {arXiv:1712.05889 [cs]},
|
||||||
|
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Distributed, Parallel, and Cluster Computing},
|
||||||
|
file = {Preprint PDF:/home/velocitatem/Zotero/storage/SUTDF5BP/Moritz et al. - 2018 - Ray A Distributed Framework for Emerging AI Applications.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/5GV2DUAA/1712.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{biewald_experiment_2020,
|
||||||
|
title = {Experiment {Tracking} with {Weights} and {Biases}},
|
||||||
|
url = {https://www.wandb.com/},
|
||||||
|
author = {Biewald, Lukas},
|
||||||
|
year = {2020},
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,9 +8,9 @@
|
|||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
|
|
||||||
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove separability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
|
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove distinguishability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned distinguishability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
|
||||||
|
|
||||||
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned separability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a cutoff set at the date of March 1st 2026.}
|
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned distinguishability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a cutoff set at the date of March 1st 2026.}
|
||||||
|
|
||||||
\subsection{Motivation and Market Context}
|
\subsection{Motivation and Market Context}
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ We formally define interaction data as coming from some actor which can either b
|
|||||||
This dissertation is organized around one main research question and three supporting sub-questions:
|
This dissertation is organized around one main research question and three supporting sub-questions:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item[\textbf{Main RQ}] How can dynamic pricing systems preserve margin integrity when transaction orchestration is increasingly mediated by non-human agents?
|
\item[\textbf{Main RQ}] How can dynamic pricing systems preserve margin integrity when transaction orchestration is increasingly mediated by non-human agents?
|
||||||
\item[\textbf{SQ1}] \textit{Separability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
\item[\textbf{SQ1}] \textit{Distinguishability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
||||||
\item[\textbf{SQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
\item[\textbf{SQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
||||||
\item[\textbf{SQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
\item[\textbf{SQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
@@ -64,4 +64,4 @@ Extract final result $r$ from terminal state\;
|
|||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
|
The previously described goal of distinguishability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
\section{Literature Review}
|
\section{Literature Review}
|
||||||
|
|
||||||
To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
|
To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for distinguishing non-human reconnaissance from genuine human demand expression and integrating that distinguishability into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
|
||||||
|
|
||||||
\subsection{Agent Taxonomy and Definitions}
|
\subsection{Agent Taxonomy and Definitions}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
% Extra notes and clarifications: we observed some humans and get their transition probabilities between event types
|
% Extra notes and clarifications: we observed some humans and get their transition probabilities between event types
|
||||||
% We modify behavioral profiles of transition matrices with price elasticity matrices generated by sample valuations of a distributing.
|
% We modify behavioral profiles of transition matrices with price elasticity matrices generated by sample valuations of a distributing.
|
||||||
|
|
||||||
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven separability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
|
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven distinguishability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
|
||||||
|
|
||||||
\subsection{Problem Formalization}
|
\subsection{Problem Formalization}
|
||||||
|
|
||||||
@@ -40,6 +40,7 @@ We formalize the heterogeneity of actors by introducing a type space $\Theta$. A
|
|||||||
Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p; \theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p; \theta)] + \epsilon_t
|
Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p; \theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p; \theta)] + \epsilon_t
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
|
where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
|
||||||
|
Accounting for behavioral and market variation, we also treat $\epsilon_t$ as absorbing serving-path variability from LLM infrastructure (e.g., batch-size-dependent inference behavior under changing load), which appears stochastic at the request level even under greedy decoding \parencite{horace_he_and_thinking_machines_lab_defeating_2025}.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -140,6 +141,8 @@ The architecture of this platform begins with the deployed web-apps posting inte
|
|||||||
|
|
||||||
\paragraph{Public Web Artifact} We transition the Kappa like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters or what the framework expects in terms of schemas for pricing providers and log ingestion servicse. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
|
\paragraph{Public Web Artifact} We transition the Kappa like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters or what the framework expects in terms of schemas for pricing providers and log ingestion servicse. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
|
||||||
|
|
||||||
|
\paragraph{Public Dataset} For reproducibility of the behavioral analysis and distinguishability experiments, we also release the interaction dataset used in this thesis as \textit{WhoClickedIt}. The dataset is hosted on Hugging Face \footnote{\url{https://huggingface.co/datasets/velocitatem/whoclickedit}} and is distributed as one flattened event sheet (\texttt{whoclicked.csv}) with explicit labels (\texttt{actor\_type}, \texttt{is\_agent}, and \texttt{record\_type}). The associated dataset card specifies the schema, collection process, and known limitations; a full copy is included in Appendix~\ref{app:whoclicked_card}.
|
||||||
|
|
||||||
|
|
||||||
\subsubsection{DevOps Principles}
|
\subsubsection{DevOps Principles}
|
||||||
|
|
||||||
@@ -182,13 +185,24 @@ Since users act with motivations, we define a pool of tasks (jobs to be done) an
|
|||||||
The task pool is stored as a structured table with fields \texttt{id}, \texttt{created\_at}, \texttt{task\_name}, \texttt{task\_description}, and \texttt{task\_def\_of\_done}. We formulate the tasks as compact jobs-to-be-done rather than as strict click scripts, because the target is to elicit realistic browsing and comparison behavior which can capture nuance of different people. In hotel mode the assigned tasks include \textit{Cheapest Room}, \textit{Cheapest Room w/ View}, \textit{MultiStep Cheapest Room}, \textit{The Digital Nomad (Executive)}, and \textit{The 3-Way Tradeoff (Desk + Quiet + Flexible)}. These prompts deliberately require critical thought in search, inspection of room details, comparison of amenities or images, return visits to the listing page, and a final booking decision which create a degree of cognitive load. In airline mode we use \textit{Last-Minute One-Way Flight}, where the actor must urgently travel to LAX from either SEA or JFK within the next 1--3 days, inspect at least a small set of candidate itineraries, and then book a reasonable earliest departure.
|
The task pool is stored as a structured table with fields \texttt{id}, \texttt{created\_at}, \texttt{task\_name}, \texttt{task\_description}, and \texttt{task\_def\_of\_done}. We formulate the tasks as compact jobs-to-be-done rather than as strict click scripts, because the target is to elicit realistic browsing and comparison behavior which can capture nuance of different people. In hotel mode the assigned tasks include \textit{Cheapest Room}, \textit{Cheapest Room w/ View}, \textit{MultiStep Cheapest Room}, \textit{The Digital Nomad (Executive)}, and \textit{The 3-Way Tradeoff (Desk + Quiet + Flexible)}. These prompts deliberately require critical thought in search, inspection of room details, comparison of amenities or images, return visits to the listing page, and a final booking decision which create a degree of cognitive load. In airline mode we use \textit{Last-Minute One-Way Flight}, where the actor must urgently travel to LAX from either SEA or JFK within the next 1--3 days, inspect at least a small set of candidate itineraries, and then book a reasonable earliest departure.
|
||||||
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor.
|
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor.
|
||||||
|
|
||||||
The human data collection involved 18 participants, all of whom provided explicit informed consent prior to their session. Participants had an average age of 21 years and were recruited from a university population. Alongside the 18 human sessions we ran 18 agent sessions of equivalent task scope, giving a balanced dataset of 36 labeled trajectories. Each participant was assigned a single platform mode and a single task drawn from the pool, and completed the session independently without guidance on navigation or pricing strategy.
|
The human data collection involved 13 participants, all of whom provided explicit informed consent prior to their session. Participants had an average age of 21 years and were recruited from a university population. Alongside the 13 human sessions we ran 16 agent sessions of equivalent task scope, yielding 29 labeled trajectories in total (45\% human, 55\% agent). Each participant was assigned a single platform mode and a single task drawn from the pool, and completed the session independently without guidance on navigation or pricing strategy.
|
||||||
|
|
||||||
To evaluate quality and realism of the setup, we store both structured event logs and full interaction transcripts. This lets us combine quantitative analysis with transcript-level qualitative findings. The result is an isolated system where we can control the interaction process while preserving realistic behavior.
|
To evaluate quality and realism of the setup, we store both structured event logs and full interaction transcripts. This lets us combine quantitative analysis with transcript-level qualitative findings. The result is an isolated system where we can control the interaction process while preserving realistic behavior.
|
||||||
|
|
||||||
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to separate classes $\theta \in \{A,H\}$ with session-conditioned probability estimates, then injects those estimates into the pricing learner.
|
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to distinguish classes $\theta \in \{A,H\}$ with session-conditioned probability estimates, then injects those estimates into the pricing learner.
|
||||||
|
|
||||||
Our process follows three stages: (1) observe and \textit{vectorize} behavioral interactions, (2) learn separability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
Our process follows three stages: (1) observe and \textit{vectorize} behavioral interactions, (2) learn distinguishability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
||||||
|
|
||||||
|
Figure~\ref{fig:phantom_unified_architecture} summarizes the full mechanism from online interaction capture to divergence-based contamination scoring and robust control of pricing decisions.
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\resizebox{\textwidth}{!}{%
|
||||||
|
\input{chapters/hero_architecture_figure.tex}
|
||||||
|
}
|
||||||
|
\caption{Unified PHANTOM defense architecture. (a) Online serving and logging with behavioral and price-query streams. (b) Distinguishability layer that estimates KL divergence to human/agent prototypes and derives session-level contamination scores. (c) Distributionally robust pricing control that optimizes under an ambiguity set while penalizing COI leakage and tracking UX cost.}
|
||||||
|
\label{fig:phantom_unified_architecture}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}[ht]
|
\begin{figure}[ht]
|
||||||
\resizebox{\columnwidth}{!}{%
|
\resizebox{\columnwidth}{!}{%
|
||||||
@@ -206,8 +220,8 @@ The dynamic pricing mechanism elicited immediate behavioral adjustments. Partici
|
|||||||
|
|
||||||
\subsubsection{Design of Training Factorial Study}
|
\subsubsection{Design of Training Factorial Study}
|
||||||
|
|
||||||
The simulator has multiple configurable factors. We design a multi-factor study across five axes derived from the sweep configurations: (1) RL algorithm (\texttt{ppo}, \texttt{a2c}, \texttt{dqn}, \texttt{qtable}; 4 levels), (2) contamination ratio $\alpha$ sampled from $[0.1, 0.6]$ at four representative levels, (3) robustness radius $\epsilon_\alpha \in \{0.0, 0.15, 0.3\}$ (3 levels), (4) COI penalty weight $\lambda_\text{coi}$ at two reference levels, and (5) pricing action granularity (two discretization settings for \texttt{action\_levels}); giving a grid of $4\times4\times3\times2\times2 = 192$ configurations. Statistical power for the behavioral comparisons is determined by a two-sample test over per-session KL divergence scores; a formal power analysis with minimum detectable effect size at $n=18+18$ is reported in the results.
|
The simulator has multiple configurable factors. We design a multi-factor study across five axes derived from the sweep configurations: (1) RL algorithm (\texttt{ppo}, \texttt{a2c}, \texttt{dqn}, \texttt{qtable}; 4 levels), (2) contamination ratio $\alpha$ sampled from $[0.1, 0.6]$ at four representative levels, (3) robustness radius $\epsilon_\alpha \in \{0.0, 0.15, 0.3\}$ (3 levels), (4) COI penalty weight $\lambda_\text{coi}$ at two reference levels, and (5) pricing action granularity (two discretization settings for \texttt{action\_levels}); giving a grid of $4\times4\times3\times2\times2 = 192$ configurations. Statistical power for the behavioral comparisons is determined by a two-sample test over per-session KL divergence scores; a formal power analysis with minimum detectable effect size at $n_H=13$, $n_A=16$ is reported in the results.
|
||||||
% Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
|
% Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n_H=13 and n_A=16. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
|
||||||
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
|
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
|
||||||
|
|
||||||
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
|
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
|
||||||
@@ -245,7 +259,8 @@ v4 & 64 (32 + 32) & us-central2-b & 32 Spot + 32 On-demand \\
|
|||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
For connections from Madrid, we prioritize the europe-west4 allocation for latency-sensitive runs with the benefit of having the most grouped chips within a single region. This regional grouping is important for the deployment of our Kubernetes cluster which cannot span multiple regions. All sweep metadata, model checkpoints, and reward traces are logged in Weights \& Biases. Hardware specifications are from the official Google Cloud TPU documentation \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
|
For connections from Madrid, we prioritize the europe-west4 allocation for latency-sensitive runs with the benefit of having the most grouped chips within a single region. This regional grouping is important for the deployment of our Kubernetes cluster which cannot span multiple regions. All sweep metadata, model checkpoints, and reward traces are logged in Weights \& Biases. % TODO: cite this (from bib)
|
||||||
|
Hardware specifications are from the official Google Cloud TPU documentation \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
|
||||||
|
|
||||||
Design of training processes: we build docker image with the fact in mind of different caching over layers in order to most speed up docker re-building and such we place the most volatile steps towards the end of the image building. What is means in practice is that any dependency installations are isolated so edits to source code do no trigger rebuilds. Only if we update our entry point of training a sweep, Docker will also rebuild the source-code copy stage.
|
Design of training processes: we build docker image with the fact in mind of different caching over layers in order to most speed up docker re-building and such we place the most volatile steps towards the end of the image building. What is means in practice is that any dependency installations are isolated so edits to source code do no trigger rebuilds. Only if we update our entry point of training a sweep, Docker will also rebuild the source-code copy stage.
|
||||||
|
|
||||||
@@ -294,15 +309,15 @@ In addition to behavioral events, the platform logs price observations to a sepa
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Generative Contamination and Separability}
|
\subsection{Generative Contamination and Distinguishability}
|
||||||
|
|
||||||
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from Phantom data using a two-stage approach.
|
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from Phantom data using a two-stage approach.
|
||||||
|
|
||||||
|
|
||||||
\subsubsection{Ground-Truth Separability}
|
\subsubsection{Ground-Truth Distinguishability}
|
||||||
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $\theta_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels separable enough to justify downstream pricing control that depends on that separability?
|
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $\theta_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels distinguishable enough to justify downstream pricing control that depends on that distinguishability?
|
||||||
|
|
||||||
To answer this, we compute per-session KL divergence scores against both class-level centroids. For each session $s$ in either partition, we fit a session-level event transition kernel $\hat{\mathcal{T}}_s$ from that session's trajectory alone, then compute its average KL divergence to the human centroid ($\Delta_{H,s}$) and to the agent centroid ($\Delta_{A,s}$). The per-session separability score is the gap $\Delta_{H,s} - \Delta_{A,s}$: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
|
To answer this, we compute per-session KL divergence scores against both class-level centroids. For each session $s$ in either partition, we fit a session-level event transition kernel $\hat{\mathcal{T}}_s$ from that session's trajectory alone, then compute its average KL divergence to the human centroid ($\Delta_{H,s}$) and to the agent centroid ($\Delta_{A,s}$). The per-session distinguishability score is the gap $\Delta_{H,s} - \Delta_{A,s}$: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
|
||||||
|
|
||||||
The normality assumption cannot be made for KL divergence distributions, which are right-skewed and bounded below by zero, so we do not use a Student's $t$-test. Instead we apply a Mann-Whitney $U$ test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the stochastic ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations. We report $U$, the exact two-sided $p$-value, and group-level descriptive statistics for the gap scores.
|
The normality assumption cannot be made for KL divergence distributions, which are right-skewed and bounded below by zero, so we do not use a Student's $t$-test. Instead we apply a Mann-Whitney $U$ test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the stochastic ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations. We report $U$, the exact two-sided $p$-value, and group-level descriptive statistics for the gap scores.
|
||||||
|
|
||||||
@@ -387,8 +402,10 @@ The complete pricing-demand-trajectory loop is illustrated in Figure~\ref{fig:or
|
|||||||
|
|
||||||
\begin{figure}[ht]
|
\begin{figure}[ht]
|
||||||
\centering
|
\centering
|
||||||
\[
|
{\setlength{\arraycolsep}{4pt}%
|
||||||
\text{Oracle}(\vec{p}_{t-1},\vec{\hat{q}})\to
|
\resizebox{0.85\linewidth}{!}{$
|
||||||
|
\begin{aligned}
|
||||||
|
&\text{Oracle}(\vec{p}_{t-1},\vec{\hat{q}})\to
|
||||||
\begin{pmatrix}
|
\begin{pmatrix}
|
||||||
p_0\\
|
p_0\\
|
||||||
p_1\\
|
p_1\\
|
||||||
@@ -397,14 +414,15 @@ p_N
|
|||||||
\end{pmatrix}
|
\end{pmatrix}
|
||||||
\underrightarrow{d_i \sim \mathcal{N}_{\vec{p}}}
|
\underrightarrow{d_i \sim \mathcal{N}_{\vec{p}}}
|
||||||
\begin{pmatrix}d_0\\ d_1\\ \cdots \\ d_N\end{pmatrix}
|
\begin{pmatrix}d_0\\ d_1\\ \cdots \\ d_N\end{pmatrix}
|
||||||
\underrightarrow{\vec{d}\times \tau_\theta \to \tau^\prime}
|
\underrightarrow{\vec{d}\otimes \tau_\theta}
|
||||||
\begin{bmatrix}
|
\begin{bmatrix}
|
||||||
0.01 & 0.02 & \cdots & 0.3 \\
|
0.01 & 0.02 & \cdots & 0.3 \\
|
||||||
0.41 & 0.24 & \cdots & 0.0 \\
|
0.41 & 0.24 & \cdots & 0.0 \\
|
||||||
\cdots & \cdots & \cdots & \cdots \\
|
\cdots & \cdots & \cdots & \cdots \\
|
||||||
0.51 & 0.09 & \cdots & 0.1 \\
|
0.51 & 0.09 & \cdots & 0.1 \\
|
||||||
\end{bmatrix}
|
\end{bmatrix}
|
||||||
\underrightarrow{\tau_k \sim \tau^\prime}
|
\\
|
||||||
|
&\underrightarrow{\tau_k \sim \tau^\prime}
|
||||||
\{\tau_k\}_{k=0}^K \to \hat{Q}(\tau_k)
|
\{\tau_k\}_{k=0}^K \to \hat{Q}(\tau_k)
|
||||||
\to \begin{pmatrix}
|
\to \begin{pmatrix}
|
||||||
\hat{q}_0 \\
|
\hat{q}_0 \\
|
||||||
@@ -413,8 +431,10 @@ p_N
|
|||||||
\hat{q}_N \\
|
\hat{q}_N \\
|
||||||
\end{pmatrix}
|
\end{pmatrix}
|
||||||
\to \text{Oracle}(\cdot)
|
\to \text{Oracle}(\cdot)
|
||||||
\]
|
\end{aligned}
|
||||||
\caption{Oracle-based pricing loop: historical price and demand state map to a new price vector; each product samples demand curves from $\mathcal{N}_{\vec{p}}$; trajectories are generated by mixing demand with behavioral kernels $\tau_\theta$ into transition matrix $\tau'$; sampled trajectories $\{\tau_k\}$ aggregate through proxy $Q(\cdot)$ to yield updated demand $\vec{\hat{q}}$, closing the feedback loop.}
|
$}%
|
||||||
|
}
|
||||||
|
\caption{Oracle-based pricing loop: historical price and demand state map to a new price vector; each product samples demand curves from $\mathcal{N}_{\vec{p}}$; trajectories are generated via the Kronecker product $\vec{d}\otimes\tau_\theta$ into transition matrix $\tau'$; sampled trajectories $\{\tau_k\}$ aggregate through proxy $Q(\cdot)$ to yield updated demand $\vec{\hat{q}}$, closing the feedback loop.}
|
||||||
\label{fig:oracle_flow}
|
\label{fig:oracle_flow}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
@@ -461,7 +481,7 @@ We also consider taxation-like overlays for agent traffic under strategy-proof m
|
|||||||
|
|
||||||
\subsubsection{Pricing Mechanism Summary}
|
\subsubsection{Pricing Mechanism Summary}
|
||||||
|
|
||||||
We now present the complete pricing mechanism that integrates the behavioral separability, contamination estimation, and robust optimization components developed in the preceding sections. Algorithm~\ref{alg:phantom_loop_clean} formalizes the defensive pricing loop as a Stackelberg game where the platform (leader) sets prices and the aggregate demand (follower) responds through observed session trajectories.
|
We now present the complete pricing mechanism that integrates the behavioral distinguishability, contamination estimation, and robust optimization components developed in the preceding sections. Algorithm~\ref{alg:phantom_loop_clean} formalizes the defensive pricing loop as a Stackelberg game where the platform (leader) sets prices and the aggregate demand (follower) responds through observed session trajectories.
|
||||||
|
|
||||||
\begin{algorithm}[t]
|
\begin{algorithm}[t]
|
||||||
\caption{PHANTOM defensive pricing loop}
|
\caption{PHANTOM defensive pricing loop}
|
||||||
@@ -494,3 +514,47 @@ We now present the complete pricing mechanism that integrates the behavioral sep
|
|||||||
The algorithm operates in discrete epochs indexed by $t$. At each epoch, the platform applies one discrete multiplicative price action, the environment samples a batch of sessions, and demand is recomputed from weighted events. Robustness is implemented as an inner minimization over a small local grid of contamination candidates around nominal $\alpha_0$, matching the current engine implementation. The history buffer $\mathcal{L}$ (``Limbo'' in our implementation) enforces the alternating Stackelberg structure by preserving the temporal sequence of price publications and demand observations.
|
The algorithm operates in discrete epochs indexed by $t$. At each epoch, the platform applies one discrete multiplicative price action, the environment samples a batch of sessions, and demand is recomputed from weighted events. Robustness is implemented as an inner minimization over a small local grid of contamination candidates around nominal $\alpha_0$, matching the current engine implementation. The history buffer $\mathcal{L}$ (``Limbo'' in our implementation) enforces the alternating Stackelberg structure by preserving the temporal sequence of price publications and demand observations.
|
||||||
|
|
||||||
%The defensive price update in Line 24 implements contamination-aware margin shrinkage: as estimated contamination $\hat{\alpha}_t$ rises, the margin $(p^{\mathrm{ref}} - c)$ is reduced by factor $\kappa\in[0,1]$, with projection $\Pi_{\mathcal{P}}$ ensuring feasibility. In subsequent experiments this heuristic rule is replaced by DR-RL policy $\pi^*$ from Eq.~\ref{eq:robust_policy}.
|
%The defensive price update in Line 24 implements contamination-aware margin shrinkage: as estimated contamination $\hat{\alpha}_t$ rises, the margin $(p^{\mathrm{ref}} - c)$ is reduced by factor $\kappa\in[0,1]$, with projection $\Pi_{\mathcal{P}}$ ensuring feasibility. In subsequent experiments this heuristic rule is replaced by DR-RL policy $\pi^*$ from Eq.~\ref{eq:robust_policy}.
|
||||||
|
|
||||||
|
\subsection{Parallelization Strategy}
|
||||||
|
|
||||||
|
To avoid preemption of compute mid-training we settle on using a v4 generation, 40 chip compute node with 5 parallel workers. The login node creates an orchestration node with Ray \parencite{moritz_ray_2018} and we distribute ray compute nodes per each other worker.
|
||||||
|
|
||||||
|
\subsubsection{Computational Cost Analysis of the Simulation Step}
|
||||||
|
The per-step cost of Algorithm~\ref{alg:phantom_loop_clean} is not uniform across its components. To inform hardware provisioning and to identify where algorithmic improvements are most impactful, we profile the hot path of the engine using Python's \texttt{cProfile} instrumentation over 20 environment steps under two configurations: a baseline with the robustness inner loop disabled ($K=1$, $\epsilon_\alpha=0$) and a standard robust setting ($K=5$, $\epsilon_\alpha=0.2$). Both runs use $M=10$ sessions per market call and $N=3$ products.
|
||||||
|
|
||||||
|
The baseline achieves approximately 26 steps per second. Enabling the robustness inner loop with $K=5$ candidates drops throughput to 7.2 steps per second, a $3.6\times$ slowdown that is directly proportional to $K$, consistent with the $O(K)$ scaling of the adversarial alpha selection in the implementation.
|
||||||
|
|
||||||
|
\begin{table}[ht]
|
||||||
|
\centering
|
||||||
|
\caption{Per-step profiling results (20 steps, $M=10$ sessions, $N=3$ products). Self-time measures time spent inside the function excluding callees; cumulative time includes the full call subtree.}
|
||||||
|
\label{tab:profile_results}
|
||||||
|
\begingroup
|
||||||
|
\small
|
||||||
|
\setlength{\tabcolsep}{4pt}
|
||||||
|
\begin{tabular}{@{}lrrrr@{}}
|
||||||
|
\toprule
|
||||||
|
\textbf{Function} & \textbf{Calls} & \textbf{Self (ms)} & \textbf{Cum. (ms)} & \textbf{Cum. \%} \\
|
||||||
|
\midrule
|
||||||
|
\multicolumn{5}{l}{\textit{Baseline ($K=1$, 0.77\,s total, 26 steps/s)}} \\
|
||||||
|
\texttt{sample\_behavior\_from\_transitions} & 420 & 131 & 658 & 86\% \\
|
||||||
|
\texttt{DataFrame.xs} & 4,820 & 30 & 201 & 26\% \\
|
||||||
|
\texttt{numpy.nan\_to\_num} & 4,904 & 43 & 97 & 13\% \\
|
||||||
|
\texttt{adjust\_behavior\_to\_condition} & 84 & 3 & 54 & 7\% \\
|
||||||
|
\midrule
|
||||||
|
\multicolumn{5}{l}{\textit{Robust ($K=5$, 2.79\,s total, 7.2 steps/s)}} \\
|
||||||
|
\texttt{sample\_behavior\_from\_transitions} & 1,220 & 519 & 2,447 & 88\% \\
|
||||||
|
\texttt{DataFrame.xs} & 16,668 & 108 & 729 & 26\% \\
|
||||||
|
\texttt{numpy.nan\_to\_num} & 16,912 & 164 & 363 & 13\% \\
|
||||||
|
\texttt{adjust\_behavior\_to\_condition} & 244 & 11 & 108 & 4\% \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
||||||
|
\endgroup
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
Across both configurations, \texttt{sample\_behavior\_from\_transitions} accounts for 86--88\% of total wall time. The function implements the Markov chain sampler described in Section~\ref{sec:tpe}: at each transition it retrieves the current-state row from the expanded transition \texttt{DataFrame} via label-based indexing, which internally dispatches through the pandas \texttt{xs} and \texttt{fast\_xs} code paths. For $M$ sessions each running up to $L_{\max}=40$ transitions, a single \texttt{market.act()} call issues up to $M \cdot L_{\max}$ individual row lookups. With $K=5$ robustness candidates per outer step this accumulates to $5 \times 10 \times 40 = 2{,}000$ row accesses per outer step, producing the 16k \texttt{xs} invocations observed in Table~\ref{tab:profile_results}.
|
||||||
|
|
||||||
|
The \texttt{numpy.nan\_to\_num} calls, accounting for 13\% of self-time, occur once per row lookup to sanitize sampled probability vectors before normalization; their call count therefore tracks the \texttt{xs} count exactly.
|
||||||
|
|
||||||
|
\texttt{adjust\_behavior\_to\_condition} expands the base $E \times E$ event transition matrix to a $(E \cdot N) \times (E \cdot N)$ product-specific matrix via a Kronecker product. At $N=3$ this is inexpensive, but the cost scales as $O(E^2 N^2)$, so at the $N=10$ default it becomes a more significant contributor. The result is not cached across the $K$ robustness candidates inside a single outer step, meaning the Kronecker expansion is recomputed $2K$ times per step (once for the human kernel and once for the agent kernel at each candidate $\alpha_k$).
|
||||||
|
|
||||||
|
The dominant bottleneck therefore has a clear structural cause: the expanded transition matrix is a string-keyed \texttt{DataFrame}, and pandas object-level indexing carries substantial per-call overhead relative to the arithmetic being performed. Converting the expanded matrix to a \texttt{numpy} array with an accompanying integer state-to-index map, computed once per \texttt{market.act()} call and cached for the duration of the robustness inner loop, eliminates the entire pandas dispatch chain. We leverage this bottleneck identified as an opportunity to squeeze the gap which is left by the computational needs of the pricing learner. We make use of JAX to parallelize on the TPU, and surprisingly we open up a large speedup even on CPU-only compute, improving throughput from 26 to 220 steps/s in the baseline configuration and from 7.2 to 136 steps/s under the full robust inner loop, an 8.5$\times$ and 19$\times$ speedup respectively.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
\section{Results}
|
\section{Results}
|
||||||
\begin{figure}[ht]
|
\begin{figure}[ht]
|
||||||
\centering
|
\centering
|
||||||
\input{chapters/figures/supra.tex}
|
\input{chapters/figures/supra/supra.tex}
|
||||||
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as SAC as can be clearly seen by the high density at the highest available price.}
|
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as SAC as can be clearly seen by the high density at the highest available price.}
|
||||||
\label{fig:supra_heatmap}
|
\label{fig:supra_heatmap}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
\subsection{Behavioral Analysis}
|
\subsection{Behavioral Analysis}
|
||||||
|
|
||||||
Separability between human and agent sessions is evaluated by computing per-session divergence gap scores $\Delta_{H,s} - \Delta_{A,s}$ and comparing the two groups with a Mann-Whitney $U$ test. Table~\ref{tab:divergence_significance} reports the group-level descriptive statistics for the gap scores and the test result.
|
Distinguishability between human and agent sessions is evaluated by computing per-session divergence gap scores $\Delta_{H,s} - \Delta_{A,s}$ and comparing the two groups with a Mann-Whitney $U$ test. The full recorded cohort contains $n_H=13$ human sessions and $n_A=16$ agent sessions, and Table~\ref{tab:divergence_significance} reports the corresponding group-level statistics and test result.
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
@@ -20,48 +20,67 @@ Separability between human and agent sessions is evaluated by computing per-sess
|
|||||||
\toprule
|
\toprule
|
||||||
Group & $n$ & Mean gap & Std \\
|
Group & $n$ & Mean gap & Std \\
|
||||||
\midrule
|
\midrule
|
||||||
Human sessions & 11 & $-3.3522$ & $2.6748$ \\
|
Human sessions & 13 & $-3.35$ & $2.67$ \\
|
||||||
Agent sessions & 6 & $+1.6482$ & $2.8349$ \\
|
Agent sessions & 16 & $+1.65$ & $2.83$ \\
|
||||||
\midrule
|
\midrule
|
||||||
\multicolumn{4}{l}{Mann-Whitney $U = 2.0$, $p = 0.0006$ (two-sided)} \\
|
\multicolumn{4}{l}{Mann-Whitney two-sided test: $p<0.001$} \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid). The two-sided $p$-value of $0.0006$ indicates near-complete rank separation between the groups at $n_H=11$, $n_A=6$, providing strong evidence that the transition kernels are separable enough to justify their use as a control signal in downstream pricing.
|
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid). The two-sided test result ($p<0.001$) at $n_H=13$, $n_A=16$ indicates strong rank distinction between groups, providing evidence that the transition kernels are distinguishable enough to justify their use as a control signal in downstream pricing.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Experimental Outcomes}
|
\subsection{Experimental Outcomes}
|
||||||
|
|
||||||
To evaluate robustness contributions, we compare two policies on the same environment family: (i) robust pricing with COI-aware reward and adversarial contamination step, and (ii) non-robust baseline with revenue-only reward (\texttt{--no-robust}).
|
To evaluate robustness contributions, we compare two policies on the same environment family: (i) robust pricing with COI-aware reward and adversarial contamination step, and (ii) a baseline policy with revenue-only reward.
|
||||||
|
|
||||||
We report two preliminary stages before the full factorial interpretation. First, we executed a short calibration run at $\alpha=0.3$ (2 evaluation episodes, 3000 training timesteps per tier) across \texttt{qtable}, \texttt{ppo}, \texttt{a2c}, and \texttt{dqn}. In that first run, \texttt{ppo} produced the highest objective score and revenue (objective $=3.76\mathrm{e}5$, revenue $=4.15\mathrm{e}5$), while the remaining tiers stayed lower in this small-budget regime. The corresponding price traces show a monotone escalation for \texttt{ppo} (mean price from $8.61\mathrm{e}1$ to $1.49\mathrm{e}2$), whereas \texttt{qtable}, \texttt{a2c}, and \texttt{dqn} remained nearly flat over the episode horizon. This confirms that the simulation loop is able to express policy-dependent pricing dynamics rather than collapsing into a single trajectory shape.
|
We report two preliminary stages before the full factorial interpretation. First, we executed a short calibration run at $\alpha=0.3$ (2 evaluation episodes, 3000 training timesteps per tier) across \texttt{qtable}, \texttt{ppo}, \texttt{a2c}, and \texttt{dqn}. In that first run, \texttt{ppo} produced the highest objective score and revenue (objective $=3.76\mathrm{e}5$, revenue $=4.15\mathrm{e}5$), while the remaining tiers stayed lower in this small-budget regime. The corresponding price traces show a monotone escalation for \texttt{ppo} (mean price from $8.61\mathrm{e}1$ to $1.49\mathrm{e}2$), whereas \texttt{qtable}, \texttt{a2c}, and \texttt{dqn} remained nearly flat over the episode horizon. This confirms that the simulation loop is able to express policy-dependent pricing dynamics rather than collapsing into a single trajectory shape.
|
||||||
|
|
||||||
Second, we launched an overnight paired benchmark over $\alpha \in \{0.00,0.15,0.30,0.45,0.60\}$ with 8 evaluation episodes and 8000 timesteps, comparing robust and non-robust settings at fixed seed/tier/contamination tuples. At the time of writing, two seeds (11 and 22) are complete and one additional seed is still running. We therefore frame the numbers below as an initial signal, not a final claim.
|
|
||||||
|
|
||||||
\begin{table}[ht]
|
\subsubsection{The Impact of Contamination on Revenue}
|
||||||
\centering
|
|
||||||
\caption{Early overnight aggregate over completed seeds ($n=2$; seeds 11 and 22).}
|
|
||||||
\label{tab:pricing_benchmark}
|
|
||||||
\begin{tabular}{lcccc}
|
|
||||||
\toprule
|
|
||||||
Mode & Mean objective score & Mean revenue & Mean COI level & Mean margin \\
|
|
||||||
\midrule
|
|
||||||
Robust & $3.41\mathrm{e}5$ & $3.80\mathrm{e}5$ & $1.08\mathrm{e}2$ & 0.901 \\
|
|
||||||
Non-robust (\texttt{--no-robust}) & $3.91\mathrm{e}5$ & $4.18\mathrm{e}5$ & $1.11\mathrm{e}2$ & 0.906 \\
|
|
||||||
\bottomrule
|
|
||||||
\end{tabular}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
At pair level (same seed, tier, and contamination), robust exceeds non-robust in $13/40$ configurations on objective score and in $16/40$ configurations on revenue. The current early evidence therefore suggests a conditional robustness effect: the defense is active and measurable, but not yet uniformly beneficial without further calibration.
|
A linear fit test on run-level data ($n=95$) shows a strong negative association between contamination and mean revenue. The fitted model mapping $\alpha \to \text{revenue}$ result in $t(93)=-8.2148$, $p=1.20\times 10^{-12}$, $R^2=0.4205$, and a 95\% confidence interval for the slope of $[-75{,}288.76,\,-45{,}975.13]$. In practical terms, a $+0.1$ increase in $\alpha$ corresponds to an average decrease of about $6{,}063$ revenue units within our environment.
|
||||||
|
|
||||||
|
\subsubsection{Large Scale Factorial Training}
|
||||||
|
|
||||||
|
In our complete training runs we logged $\approx 180$ days of net compute time. The results we draw from extensive training are
|
||||||
|
\begin{enumerate*}[label=(\roman*)]
|
||||||
|
\item the ability to extract COI is greater in the presence of robustness within the training loop
|
||||||
|
\item short term revenue measurements suffer $\approx 3\%$ loss but COI margin compensates for this loss in the long run
|
||||||
|
\item a larger catalog size contributes positively to COI preservation under higher contamination ratios
|
||||||
|
\item supra-competitive pricing is a natural reward hacking tendency which is drastically reduced by a balanced UX penalty
|
||||||
|
\end{enumerate*}
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\input{chapters/figures/results/includes/final/final_focus_revenue_by_alpha.tex}
|
||||||
|
\caption{Revenue curves by contamination for the final cohort. The baseline remains above the defended curve in most cells, but the gap narrows in the high-contamination region.}
|
||||||
|
\label{fig:final_focus_revenue_by_alpha}
|
||||||
|
\end{figure}
|
||||||
|
% TODO: we need a similar plot which shows the COI preserved (what we gain across teh multiple conatmination leves, showing that the robust method has better COI optimization.)
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\input{chapters/figures/results/includes/final/final_focus_revenue_delta.tex}
|
||||||
|
\caption{Defended-minus-baseline revenue delta over contamination for the final cohort. The strongest high-contamination deviation begins at $\alpha=0.7$, followed by recovery toward near parity by $\alpha=1.0$.}
|
||||||
|
\label{fig:final_focus_revenue_delta}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\input{chapters/figures/results/includes/final/final_focus_risk_deltas.tex}
|
||||||
|
\caption{Defended-minus-baseline leakage and volatility deltas for the final cohort. Leakage remains lower for the defended policy across the full contamination range.}
|
||||||
|
\label{fig:final_focus_risk_deltas}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Interpretation and Insights}
|
\subsection{Interpretation and Insights}
|
||||||
The Mann-Whitney result ($U=2.0$, $p<0.001$) confirms that per-session divergence gaps separate the two actor classes with near-zero overlap in rank ordering. This is the condition required for separability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
|
The Mann-Whitney result ($p<0.001$) confirms that per-session divergence gaps distinguish the two actor classes with near-zero overlap in rank ordering. This is the condition required for distinguishability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
|
||||||
|
|
||||||
The first calibration and overnight runs additionally confirm three practical points aligned with the thesis mechanism. First, the control loop is reproducible end-to-end (training, evaluation, artifact generation) across algorithms and contamination levels. Second, policy class materially changes price trajectories and resulting COI/revenue profiles under identical environment settings. Third, objective improvements from robustness are regime-dependent in the current baseline, which is consistent with the thesis claim that contamination-aware pricing needs explicit calibration rather than a one-size-fits-all penalty.
|
The first calibration and paired benchmark runs additionally confirm three practical points aligned with the thesis. First, the control loop is reproducible end-to-end (training, evaluation, artifact generation) across algorithms and contamination levels. Second, policy class materially changes price trajectories and resulting COI/revenue profiles under identical environment settings. Third, objective improvements from robustness are regime-dependent in the current baseline, which is consistent with the thesis claim that contamination-aware pricing needs explicit calibration rather than a one-size-fits-all penalty.
|
||||||
|
|
||||||
We also note that maximizing revenue in isolation can favor aggressive high-price behavior; even in these early runs, the non-robust aggregate shows slightly higher mean COI and margin. For this reason, all subsequent reporting in this thesis is interpreted on a multi-metric basis (objective, revenue, COI, and stability), and not by revenue alone.
|
We also note that maximizing revenue in isolation can favor aggressive high-price behavior; even in these early runs, the non-robust aggregate shows slightly higher mean COI and margin. For this reason, all subsequent reporting in this thesis is interpreted on a multi-metric basis (objective, revenue, COI, and stability), and not by revenue alone.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Anomalies}
|
\subsection{Anomalies}
|
||||||
In our initial runs, we observed an instability pocket in one completed run (A2C, robust, seed 11, $\alpha=0.30$) with a large performance drop relative to neighboring configurations. We retain this run in the preliminary summary to avoid survivorship bias and treat it as evidence that robustness sensitivity analysis is necessary before final conclusions.
|
In our initial runs, we observed an instability pocket in one completed run (A2C, robust, seed 11, $\alpha=0.30$) with a large performance drop relative to neighboring configurations. We retain this run in the preliminary summary to avoid survivorship bias and treat it as evidence that robustness sensitivity analysis is necessary before final conclusions.
|
||||||
|
|||||||
@@ -16,6 +16,4 @@ This technology does not come without a more bitter side, ethical concerns do ar
|
|||||||
|
|
||||||
With a system like this there is potential for strong drift given the rapid advance of agentic systems and user preference. Our intent behind adding the UX term into the reward shaping process was to further address the risk of degraded user experience. Looking deeper at the underlying methodology, reinforcement learning does not come without it's complications such as reward hacking and often the lack of intepretability which is quite critical in systems that have a strong impact on the revenue of a company.
|
With a system like this there is potential for strong drift given the rapid advance of agentic systems and user preference. Our intent behind adding the UX term into the reward shaping process was to further address the risk of degraded user experience. Looking deeper at the underlying methodology, reinforcement learning does not come without it's complications such as reward hacking and often the lack of intepretability which is quite critical in systems that have a strong impact on the revenue of a company.
|
||||||
|
|
||||||
\subsection{Implications of Findings}
|
% \subsection{Implications of Findings} Interpretation of results and altenrative scenarios with broader market implications.
|
||||||
|
|
||||||
Interpretation of results and altenrative scenarios with broader market implications.
|
|
||||||
|
|||||||
@@ -1,11 +1,24 @@
|
|||||||
\section{Conclusion}
|
\section{Conclusion}
|
||||||
|
|
||||||
For our troubles, we now conclude that...
|
Our research has explored how reinforcement learning works within pricing systems and environments which are substantially disrupted by an adversarial participant. Our findings include the optimization for our newly introduced metrics.
|
||||||
|
|
||||||
\subsection{Summary of contributions}
|
\subsection{Summary of contributions}
|
||||||
The authors contribution was not without the advice of many experienced experts in the field. We thank Marco Casalaina VP Products, Core AI and AI Futurist at Microsoft for the initial critical discussion on the topic of dynamic pricing systems and the spark which has lead to this work. Eugene Bykovets, PhD pointing out the parallels in blockchain systems and the complexity of anonymous interaction and understanding of intent. Importantly, the contributions of Alberto Martín Izquierdo, my academic advisor for the support over and for taking on the challenge of this ambitious work. Many breakthroughs were thanks to numerous discussions with my peers on the topics covered here.
|
The contribution was not without the advice of many experienced experts in the field. We thank Marco Casalaina VP Products, Core AI and AI Futurist at Microsoft for the initial critical discussion on the topic of dynamic pricing systems and the spark which has lead to this work. Eugene Bykovets, PhD pointing out the parallels in blockchain systems and the complexity of anonymous interaction and understanding of intent. Importantly, the contributions of Alberto Martín Izquierdo, my academic advisor for the support over and for taking on the challenge of this ambitious work. Many breakthroughs were thanks to numerous discussions with my peers on the topics covered here.
|
||||||
A thanks to the head of innovation at Amadeus for insight into the industry split on the topic of collapsing margins. Finally we acknowledge the power and use of generative AI technologies for in depth research, rapid prototyping and surfacing of key topics and niches.
|
A thanks to the head of innovation at Amadeus for insight into the industry split on the topic of collapsing margins. Finally we acknowledge the power and use of generative AI technologies for in depth research, rapid prototyping and surfacing of key topics and niches.
|
||||||
|
|
||||||
|
Now we very explicitly mention what we contribute in this paper:
|
||||||
|
\begin{itemize}
|
||||||
|
\item TPU-accelerated parallelization of the behavioral simulation and reinforcement learning pipeline, making large-scale factorial sweeps tractable.
|
||||||
|
\item Formalization of non-human transaction orchestration in e-commerce as a distinct source of contamination in dynamic pricing systems.
|
||||||
|
\item Definition of the Cost of Information (COI) as a mechanism-level quantity for pricing power, together with a theorem showing its erosion under increasing agent saturation.
|
||||||
|
\item Design and implementation of a controlled e-commerce research platform, built on a hybrid Kappa-Lambda architecture, for collecting and replaying high-fidelity interaction trajectories.
|
||||||
|
\item Construction and empirical validation of a behavioral distinguishability framework that distinguishes human and agent sessions from interaction signals alone using transition kernels and KL-based divergence.
|
||||||
|
\item Development of a generative contamination mechanism that injects learned agent behavior into the pricing environment for controlled robustness experiments.
|
||||||
|
\item Translation of behavioral distinguishability into a defensive pricing mechanism through a distributionally robust reinforcement learning formulation of pricing under non-stationary contamination.
|
||||||
|
\item Empirical evidence that agent contamination reduces revenue and that robustness is condition-dependent, requiring explicit calibration rather than a one-size-fits-all penalty.
|
||||||
|
\item Release of a reusable public experimental artifact for reproducing and extending research on dynamic pricing under agent-mediated traffic.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
\subsection{Future Works and Next Steps}
|
\subsection{Future Works and Next Steps}
|
||||||
|
|
||||||
During the eights months of research dedicated to this work, a plethora of opportunities and industry gaps was identified, sadly a majority of which could not be addressed directly.
|
During the eights months of research dedicated to this work, a plethora of opportunities and industry gaps was identified, sadly a majority of which could not be addressed directly.
|
||||||
|
|||||||
165
paper/src/chapters/auto/whoclicked_dataset_card.md
Normal file
165
paper/src/chapters/auto/whoclicked_dataset_card.md
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
---
|
||||||
|
pretty_name: whoclickedit
|
||||||
|
license: mit
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
task_categories:
|
||||||
|
- tabular-classification
|
||||||
|
task_ids:
|
||||||
|
- tabular-multi-class-classification
|
||||||
|
tags:
|
||||||
|
- e-commerce
|
||||||
|
- dynamic-pricing
|
||||||
|
- behavioral-telemetry
|
||||||
|
- human-vs-agent
|
||||||
|
- session-data
|
||||||
|
size_categories:
|
||||||
|
- 1K<n<10K
|
||||||
|
---
|
||||||
|
|
||||||
|
<img align="right" width="280" src="https://raw.githubusercontent.com/velocitatem/PHANTOM/main/docs/static/images/banner.svg" alt="PHANTOM research banner" />
|
||||||
|
|
||||||
|
# [whoclickedit](https://huggingface.co/datasets/velocitatem/whoclickedit)
|
||||||
|
|
||||||
|
[](https://huggingface.co/datasets/velocitatem/whoclickedit)
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
> **Event-level behavior data for dynamic pricing research.**
|
||||||
|
> This dataset captures how humans and automated agents browse, query prices, and move through the PHANTOM storefronts during controlled experiments.
|
||||||
|
|
||||||
|
## What this dataset gives you
|
||||||
|
|
||||||
|
- A single flat file (`whoclicked.csv`) with both interaction and price-log events.
|
||||||
|
- Explicit labels for actor origin: `actor_type` and `is_agent`.
|
||||||
|
- Provenance fields from Kafka envelopes when available.
|
||||||
|
- Metadata flattened into feature-ready `metadata_*` columns.
|
||||||
|
|
||||||
|
## Snapshot
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
| --- | --- |
|
||||||
|
| Rows | `3874` |
|
||||||
|
| Columns | `42` |
|
||||||
|
| Time range (UTC) | `2025-12-05T09:43:31.301000+00:00` -> `2026-03-23T12:08:30.151000+00:00` |
|
||||||
|
| Unique sessions | `36` |
|
||||||
|
|
||||||
|
## Composition
|
||||||
|
|
||||||
|
### Rows by actor
|
||||||
|
| Actor | Rows | Share |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `human` | 798 | 20.6% |
|
||||||
|
| `agent` | 3076 | 79.4% |
|
||||||
|
|
||||||
|
### Rows by actor and record type
|
||||||
|
| Actor | Record type | Rows |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `agent` | `interaction` | 197 |
|
||||||
|
| `agent` | `price_log` | 2879 |
|
||||||
|
| `human` | `interaction` | 328 |
|
||||||
|
| `human` | `price_log` | 470 |
|
||||||
|
|
||||||
|
### Store mode coverage
|
||||||
|
| Store mode | Rows |
|
||||||
|
| --- | --- |
|
||||||
|
| `hotel` | 3628 |
|
||||||
|
| `airline` | 196 |
|
||||||
|
| `shop` | 50 |
|
||||||
|
|
||||||
|
### Top interaction events
|
||||||
|
| Interaction event | Count |
|
||||||
|
| --- | --- |
|
||||||
|
| `page_view` | 246 |
|
||||||
|
| `learn_more_about_item` | 91 |
|
||||||
|
| `view_item_page` | 88 |
|
||||||
|
| `add_item_to_cart` | 47 |
|
||||||
|
| `hover_over_title` | 23 |
|
||||||
|
| `checkout_start` | 20 |
|
||||||
|
| `hover_over_paragraph` | 6 |
|
||||||
|
| `remove_item` | 4 |
|
||||||
|
|
||||||
|
## Collection pipeline
|
||||||
|
|
||||||
|
Data is sourced from two roots inside PHANTOM:
|
||||||
|
|
||||||
|
- `experiments/collected_data` (human sessions)
|
||||||
|
- `experiments/agents/collected_data` (agent sessions)
|
||||||
|
|
||||||
|
Each session directory contains:
|
||||||
|
|
||||||
|
- `int.json`: user interaction events
|
||||||
|
- `price.json`: price quote observations
|
||||||
|
|
||||||
|
ETL behavior:
|
||||||
|
|
||||||
|
1. Accepts both Kafka-envelope records and flat payload records.
|
||||||
|
2. Flattens nested JSON to a tabular schema.
|
||||||
|
3. Preserves row-level provenance (`source_session_dir`, `source_row_index`, topic fields).
|
||||||
|
4. Adds modeling labels (`actor_type`, `is_agent`, `record_type`).
|
||||||
|
|
||||||
|
## Schema highlights
|
||||||
|
|
||||||
|
Core modeling fields:
|
||||||
|
|
||||||
|
- `actor_type`, `is_agent`, `record_type`
|
||||||
|
- `sessionId`, `experimentId`, `storeMode`, `ts`
|
||||||
|
- `eventName`, `page`, `productId`, `price`, `userAgent`
|
||||||
|
|
||||||
|
Kafka provenance fields:
|
||||||
|
|
||||||
|
- `kafka_partition_id`, `kafka_offset`, `kafka_timestamp_ms`, `kafka_compression`
|
||||||
|
- `kafka_is_transactional`, `kafka_headers`, `kafka_key_*`, `kafka_value_*`
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Metadata columns in this release</summary>
|
||||||
|
|
||||||
|
- `metadata_cabinClass`
|
||||||
|
- `metadata_dateIndex`
|
||||||
|
- `metadata_dwellTime`
|
||||||
|
- `metadata_elementText`
|
||||||
|
- `metadata_fareRule`
|
||||||
|
- `metadata_flightType`
|
||||||
|
- `metadata_itemCount`
|
||||||
|
- `metadata_nights`
|
||||||
|
- `metadata_price`
|
||||||
|
- `metadata_referrer`
|
||||||
|
- `metadata_roomType`
|
||||||
|
- `metadata_total`
|
||||||
|
- `metadata_type`
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```python
|
||||||
|
from datasets import load_dataset
|
||||||
|
|
||||||
|
ds = load_dataset("velocitatem/whoclickedit")
|
||||||
|
```
|
||||||
|
|
||||||
|
Recommended split strategy:
|
||||||
|
|
||||||
|
- Prefer session-aware or time-aware splits.
|
||||||
|
- Do not split rows from the same `sessionId` across train and test.
|
||||||
|
|
||||||
|
## Intended use
|
||||||
|
|
||||||
|
- Human-vs-agent behavior classification.
|
||||||
|
- Session-level telemetry modeling for dynamic pricing defenses.
|
||||||
|
- Robustness experiments under agent-mediated reconnaissance.
|
||||||
|
|
||||||
|
## Safety and limitations
|
||||||
|
|
||||||
|
- `userAgent` and referrer metadata can be quasi-identifying in very small samples.
|
||||||
|
- Data comes from a controlled research platform, not a full production marketplace.
|
||||||
|
- Current release has stronger coverage for `hotel` flows than `airline` flows.
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
If you use this dataset, cite the PHANTOM thesis project and link this page:
|
||||||
|
`https://huggingface.co/datasets/velocitatem/whoclickedit`
|
||||||
3
paper/src/chapters/figures/.gitignore
vendored
Normal file
3
paper/src/chapters/figures/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.pdf-view-restore
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
alpha,revenue_delta,revenue_delta_pct,reward_delta,reward_delta_pct,volatility_delta,supra_delta,coi_leakage_delta
|
||||||
|
0.0,-17982.383542886935,-5.11072862876989,-17145.799161982606,-5.235033672101227,0.001232973729699119,0.0,-0.0030412479577408003
|
||||||
|
0.1,-14962.041501283413,-4.410637208586118,-14303.760282736213,-4.531344436782669,0.0011858665298920962,0.0,-0.004133727080174038
|
||||||
|
0.2,-16153.416666167905,-4.826514761457546,-15398.621298776357,-4.9418165571901715,0.00200624274016295,0.0,-0.0033201883450373615
|
||||||
|
0.3,-17294.9275360335,-5.382423616385397,-16544.91845114401,-5.533399709364953,-0.0011022484400295268,0.0,-0.0029151149203366505
|
||||||
|
0.4,-19661.294346174283,-6.250307313590199,-18728.35578200908,-6.3953153560217535,3.582812967113658e-05,0.0,-0.0038123361988749577
|
||||||
|
0.5,-16411.03168918495,-5.3630681206030015,-15638.77510066732,-5.4888928630525315,0.00015428950526953644,0.0,-0.00439661338956944
|
||||||
|
0.6,-14729.668247641937,-5.069964928178309,-13912.22417824401,-5.148827377884945,-0.002735776807082743,0.0,-0.004310129386364658
|
||||||
|
0.7,-21160.81910514756,-7.351404104505076,-20171.762105623755,-7.525169314210056,-0.0008903632602569461,0.0,-0.0026198461183787186
|
||||||
|
0.8,-16404.76825612632,-5.9342582959227075,-15645.025250480074,-6.078699946285722,0.0010338614665691137,0.0,-0.002542765270289696
|
||||||
|
0.9,-8674.090655496111,-3.2592966246269577,-8371.30734891587,-3.378943339994106,-0.0005579187914590139,0.0,-0.0013720835439427759
|
||||||
|
1.0,768.8099906174757,0.2991618705853567,399.7394696234842,0.16706914330070038,0.0014659834822295797,0.0,-0.0007600066499474645
|
||||||
|
@@ -0,0 +1,23 @@
|
|||||||
|
alpha,mode,runs,revenue_mean,reward_mean,supra_mean,volatility_mean,coi_leakage_mean,coi_level_mean
|
||||||
|
0.0,baseline,36,351855.57381502265,327520.32242613373,0.0,0.06922494093544151,0.11931704468268205,136.80105514058158
|
||||||
|
0.0,defended,35,333873.1902721357,310374.5232641511,0.0,0.07045791466514063,0.11627579672494125,136.81832905386602
|
||||||
|
0.1,baseline,32,339226.3020897988,315662.6136522988,0.0,0.06952778671756812,0.11924519238669087,136.47864859317326
|
||||||
|
0.1,defended,33,324264.2605885154,301358.8533695626,0.0,0.07071365324746022,0.11511146530651684,136.7200845824852
|
||||||
|
0.2,baseline,31,334680.76789409376,311598.399506997,0.0,0.06848006194428993,0.11597869134898402,136.83684469591932
|
||||||
|
0.2,defended,35,318527.35122792586,296199.77820822067,0.0,0.07048630468445288,0.11265850300394666,137.2758153292305
|
||||||
|
0.3,baseline,30,321322.30327214615,299000.9636054795,0.0,0.07085669473747759,0.11527347603412934,136.4452630715689
|
||||||
|
0.3,defended,44,304027.37573611265,282456.0451543355,0.0,0.06975444629744806,0.11235836111379269,136.4704115371568
|
||||||
|
0.4,baseline,33,314565.2423109539,292844.914432166,0.0,0.07031811881503117,0.11300307992768284,136.72547178046122
|
||||||
|
0.4,defended,38,294903.9479647796,274116.55865015695,0.0,0.0703539469447023,0.10919074372880788,136.75671002806396
|
||||||
|
0.5,baseline,33,306000.80625751516,284916.7489847879,0.0,0.06938663916591635,0.11118137138243217,136.9528780620641
|
||||||
|
0.5,defended,35,289589.7745683302,269277.9738841206,0.0,0.06954092867118589,0.10678475799286273,136.65018588845163
|
||||||
|
0.6,baseline,28,290528.0106727377,270201.7985298805,0.0,0.07139577980623227,0.11081647254398667,135.258395468266
|
||||||
|
0.6,defended,41,275798.3424250958,256289.57435163652,0.0,0.06866000299914952,0.10650634315762202,136.3194947785247
|
||||||
|
0.7,baseline,40,287847.3119465684,268057.25244656845,0.0,0.07132313199532896,0.10746267580456732,137.0170522633547
|
||||||
|
0.7,defended,40,266686.49284142087,247885.4903409447,0.0,0.07043276873507201,0.1048428296861886,136.56834095392904
|
||||||
|
0.8,baseline,26,276441.76303208206,257374.52726285128,0.0,0.06945655282263205,0.1063246766773884,136.66765260798618
|
||||||
|
0.8,defended,39,260036.99477595574,241729.5020123712,0.0,0.07049041428920116,0.1037819114070987,136.61222667078658
|
||||||
|
0.9,baseline,35,266133.8213268301,247749.2667554015,0.0,0.0709569180547784,0.10455882265976374,136.5370653814206
|
||||||
|
0.9,defended,39,257459.73067133396,239377.95940648564,0.0,0.07039899926331938,0.10318673911582096,136.7368893225831
|
||||||
|
1.0,baseline,35,256987.96076959255,239265.888198164,0.0,0.06888231148034313,0.10369761394735275,136.68691718467974
|
||||||
|
1.0,defended,30,257756.77076021003,239665.62766778748,0.0,0.07034829496257271,0.10293760729740528,136.65287739235566
|
||||||
|
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"bundle": "engine/studies/results/wandb_sweep_bundles/bundle_20260317_093826",
|
||||||
|
"focus_cohort": "max_alpha_coverage",
|
||||||
|
"alpha_cells": 11,
|
||||||
|
"alpha_min": 0.0,
|
||||||
|
"alpha_max": 1.0,
|
||||||
|
"mean_revenue_delta_pct": -4.787221975639986,
|
||||||
|
"mean_reward_delta_pct": -4.91730667541704,
|
||||||
|
"zone_summary": [
|
||||||
|
{
|
||||||
|
"zone": "high_alpha_0_7_plus",
|
||||||
|
"alpha_cells": 4,
|
||||||
|
"revenue_delta_pct_mean": -4.0614492886173466,
|
||||||
|
"reward_delta_pct_mean": -4.2039358642972955,
|
||||||
|
"coi_leakage_delta_mean": -0.0018236753956396637,
|
||||||
|
"volatility_delta_mean": 0.00026289072427068336
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"zone": "low_alpha_below_0_7",
|
||||||
|
"alpha_cells": 7,
|
||||||
|
"revenue_delta_pct_mean": -5.201949225367208,
|
||||||
|
"reward_delta_pct_mean": -5.324947138914036,
|
||||||
|
"coi_leakage_delta_mean": -0.0037041938968711296,
|
||||||
|
"volatility_delta_mean": 0.00011102505536893832
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
zone,alpha_cells,revenue_delta_pct_mean,reward_delta_pct_mean,coi_leakage_delta_mean,volatility_delta_mean
|
||||||
|
high_alpha_0_7_plus,4,-4.0614492886173466,-4.2039358642972955,-0.0018236753956396637,0.00026289072427068336
|
||||||
|
low_alpha_below_0_7,7,-5.201949225367208,-5.324947138914036,-0.0037041938968711296,0.00011102505536893832
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"runs": 340,
|
||||||
|
"tiers": 5,
|
||||||
|
"alphas": 6,
|
||||||
|
"status": "ok",
|
||||||
|
"mean_tier_revenue_robust": 190714.62212212436,
|
||||||
|
"mean_tier_revenue_no_robust": 197371.17216609977,
|
||||||
|
"mean_tier_revenue_delta": -6656.5500439754105,
|
||||||
|
"mean_tier_revenue_delta_pct": -3.3726050116242514
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
tier,alpha,runs_robust,runs_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_margin_mean_delta,eval_margin_mean_delta_pct,objective_score_delta,objective_score_delta_pct,train_alpha_adv_delta,train_alpha_adv_delta_pct
|
||||||
|
dqn,0.0,5.0,2.0,-31308.987414117495,-8.73651226889534,-1909.7427407095092,-0.5742991901121623,-2.8982436567700063,-2.1108702433020436,-0.001972064237093285,-0.2116777198290971,-1909.7427407095092,-0.5742991901121623,,
|
||||||
|
dqn,0.1,8.0,4.0,-7723.542755668925,-2.2789188721535494,-74239.37371836061,-21.063854618469847,1.7435833801418141,1.2859365583872486,0.0011891962142838164,0.1278074871971924,-74239.37371836061,-21.063854618469847,0.17619791666666657,176.19791666666694
|
||||||
|
dqn,0.25,7.0,3.0,-12344.82818986749,-3.7035466052614323,93154.03627578515,36.06691230407512,0.03214544949867104,0.023426184113378143,1.763733457238459e-05,0.001893256490383175,93154.03627578515,36.06691230407512,0.14530952380952394,58.12380952380958
|
||||||
|
dqn,0.4,5.0,10.0,-7816.300706216833,-2.4694340725162824,-42362.74668471434,-13.411888482380219,0.6251272343707797,0.4579446603861758,0.0002750615520492605,0.02953644634355915,-42362.74668471434,-13.411888482380219,0.09856666666666747,24.64166666666691
|
||||||
|
dqn,0.6,5.0,4.0,-16150.011887742497,-5.347485987139731,-28508.74710866122,-10.151356300001888,-0.63306323164079,-0.46056970247177387,-0.00034537433455417155,-0.0370668515552649,-28508.74710866122,-10.151356300001888,0.1361999999999981,22.699999999999644
|
||||||
|
dqn,0.8,7.0,6.0,-18191.8826663699,-6.440527544692988,-55296.94441124235,-20.19273590083627,-0.796733634735034,-0.579832425016392,-0.0006423984775592029,-0.0689476165584585,-55296.94441124235,-20.19273590083627,0.1532857142857158,19.160714285714512
|
||||||
|
linear,0.0,9.0,8.0,-14967.67388588126,-4.273413942959129,-20107.23171681742,-6.60039931288617,-0.06127790826209889,-0.04564810574240612,-7.607744079518586e-05,-0.008177885913528719,-20107.23171681742,-6.60039931288617,,
|
||||||
|
linear,0.1,3.0,5.0,-24531.399901538738,-7.171831328305365,-96669.7835552101,-26.44920711447249,-0.3680976907859872,-0.2733723058172187,-0.0002515287835096469,-0.02702956778346356,-96669.7835552101,-26.44920711447249,,
|
||||||
|
linear,0.25,6.0,9.0,-14840.859479571285,-4.520682292638562,-26510.179456423968,-8.033117756667396,-0.13734776448131925,-0.10212641096230607,-9.41162442338328e-05,-0.010115001392981545,-26510.179456423968,-8.033117756667396,,
|
||||||
|
linear,0.4,4.0,11.0,-17196.7642560167,-5.486915251242723,-74520.10209817477,-25.042311510043184,0.12217076984330788,0.09098828726103136,0.00010713887099822461,0.011516865671259795,-74520.10209817477,-25.042311510043184,,
|
||||||
|
linear,0.6,5.0,3.0,-14284.06615788641,-4.854766876637072,38417.71856593515,14.088596762512362,0.24251461234271687,0.1806530855220358,0.0002606811969937395,0.028024824619509187,38417.71856593515,14.088596762512362,,
|
||||||
|
linear,0.8,4.0,11.0,-10840.488575784548,-3.933600919557566,15749.581078662042,6.447651726824251,0.028051260535562506,0.020876236575910773,5.361882659971062e-05,0.005763158099097226,15749.581078662042,6.447651726824251,,
|
||||||
|
qtable,0.0,9.0,8.0,-18644.457288398524,-8.15323701554329,32993.42568058451,20.675688115613053,10.369779227648095,10.682768960780463,0.018566897519637582,2.0803084179092814,32993.42568058451,20.675688115613053,0.11839814814814797,
|
||||||
|
qtable,0.1,6.0,5.0,-12549.400855549495,-4.616991193742389,-37207.79701261924,-15.336047254435487,0.0884057957559321,0.07703761042583206,-0.01127789819771663,-1.2272540823820444,-37207.79701261924,-15.336047254435487,0.07577777777777787,75.77777777777803
|
||||||
|
qtable,0.25,6.0,5.0,-1534.3527429780224,-0.5456640130847226,18433.43663451099,7.304472653867784,-0.5776125938941306,-0.45734160960552755,-0.003316338490628068,-0.3584028328803385,18433.43663451099,7.304472653867784,0.1181458333333334,47.258333333333354
|
||||||
|
qtable,0.4,8.0,6.0,-15146.258176090778,-5.274860187729517,-37364.22587794208,-13.005651205148677,0.4611471727478005,0.3629050099230144,0.0071046453227539,0.7751478467862876,-37364.22587794208,-13.005651205148677,0.11010416666666772,27.52604166666698
|
||||||
|
qtable,0.6,6.0,6.0,-9577.578548656049,-3.9322693501816666,-19088.152339068736,-9.571307395166029,0.9081750157567683,0.7495917946306662,0.0015520804425310786,0.16838348372043557,-19088.152339068736,-9.571307395166029,0.16983333333333228,28.305555555555333
|
||||||
|
qtable,0.8,5.0,2.0,-52751.680936846446,-19.699089872409548,-16508.209313987172,-7.589601869470744,-15.022454081083623,-11.215398490282094,-0.007791824761087751,-0.8384414846099099,-16508.209313987172,-7.589601869470744,0.11120000000000174,13.900000000000245
|
||||||
|
static,0.0,5.0,6.0,-4782.871053113384,-5.233544525848519,14411.4689779756,25.538141347978577,1.307060701942973,1.8731997380823568,0.002537468952847566,0.2911381045328444,14411.4689779756,25.538141347978577,,
|
||||||
|
static,0.1,8.0,5.0,1629.4524528499896,1.880088900553112,-5347.078589385725,-8.14812684380662,0.3600324838305795,0.5019134064795009,-4.6492644957929485e-05,-0.005316014641356001,-5347.078589385725,-8.14812684380662,,
|
||||||
|
static,0.25,5.0,6.0,-9938.662276761897,-10.398087633377964,-23616.087243780566,-27.701108621456626,-3.0513860773271233,-4.099238223547561,-0.003519771479853273,-0.40113716461596144,-23616.087243780566,-27.701108621456626,,
|
||||||
|
static,0.4,3.0,4.0,1850.8400595222774,2.1912497828943436,15058.659457798465,23.67199439061036,3.669612467486587,5.430169778169349,0.006763447803564415,0.7804393835882188,15058.659457798465,23.67199439061036,,
|
||||||
|
static,0.6,6.0,5.0,1038.893948415236,1.2765037688226162,-6062.864079504681,-9.363144945348399,-1.712609061865976,-2.3996341009364213,-0.0042285583442709385,-0.48362088973179423,-6062.864079504681,-9.363144945348399,,
|
||||||
|
static,0.8,3.0,7.0,2696.6340631967323,3.6826150812750567,149.22406835677975,0.27280281303997084,0.8491716126507072,1.2427748744725668,0.0032786525965587954,0.3777595573932637,149.22406835677975,0.27280281303997084,,
|
||||||
|
surge,0.0,6.0,6.0,-606.73760243367,-5.066579306500225,-244.17585425326251,-5.525800641331023,0.014874931199557295,0.09186560988877175,0.0019308940532419272,0.4471794260021321,-244.17585425326251,-5.525800641331023,,
|
||||||
|
surge,0.1,2.0,5.0,169.78743573408792,1.446343107913299,-1012.7706974660168,-20.02053666691211,-0.14459518037699226,-0.864651254901582,-0.0018650458785858248,-0.4260349899970559,-1012.7706974660168,-20.02053666691211,,
|
||||||
|
surge,0.25,10.0,7.0,-128.20993816584632,-1.1276930411162496,-81.21373487263281,-1.7081453033360994,0.3008506477195141,1.839047728806548,0.0030750148302954305,0.7102446987902812,-81.21373487263281,-1.7081453033360994,,
|
||||||
|
surge,0.4,6.0,6.0,-473.03722764431404,-4.297928307550563,28.557452243338048,0.6755106104955642,-0.5027452173053764,-3.072002360121898,-0.005581380442163164,-1.288152985482699,28.557452243338048,0.6755106104955642,,
|
||||||
|
surge,0.6,2.0,5.0,307.79436325796996,3.0356727142643067,2060.57396030564,63.382050333909866,0.2339650444065704,1.438519400758399,0.001302270025389629,0.30077697380833807,2060.57396030564,63.382050333909866,,
|
||||||
|
surge,0.8,3.0,3.0,423.15386247993047,4.372210191290083,1117.0942083304312,34.86182570616373,0.8971464536957541,5.327339899805159,0.007068630716831503,1.6094191039618562,1117.0942083304312,34.86182570616373,,
|
||||||
|
@@ -0,0 +1,61 @@
|
|||||||
|
tier,alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
|
||||||
|
dqn,0.0,no_robust,2,358369.40933039243,3531.782519351935,332534.46523867303,114183.5587841961,137.30089123035202,0.8184776440325546,0.9316352418598786,0.0006839003676302996,332534.46523867303,114183.5587841961,,
|
||||||
|
dqn,0.0,robust,5,327060.42191627494,24311.17412598574,330624.7224979635,62834.39223547943,134.40264757358202,6.160000643680792,0.9296631776227853,0.004262039730140749,330624.7224979635,62834.39223547943,0.17835000000000004,0.08829347371125472
|
||||||
|
dqn,0.1,no_robust,4,338912.58043645386,19584.736810155388,352449.13650924934,34076.74819101191,135.58860029055563,3.4055508991301524,0.9304589585186211,0.0023438665484978773,352449.13650924934,34076.74819101191,0.0999999999999998,0.0
|
||||||
|
dqn,0.1,robust,8,331189.03768078494,8060.912085646968,278209.7627908887,57861.69545853692,137.33218367069745,0.43113256118808096,0.931648154732905,0.000296560958972609,278209.7627908887,57861.69545853692,0.2761979166666664,0.09826648189130198
|
||||||
|
dqn,0.25,no_robust,3,333324.4996115304,6101.717861804452,258281.15112936878,46772.05216097596,137.2201692904545,0.9866477887862672,0.9315871706751672,0.0006356053229300815,258281.15112936878,46772.05216097596,0.25,0.0
|
||||||
|
dqn,0.25,robust,7,320979.6714216629,7345.8761269427705,351435.18740515393,40320.63699261721,137.25231473995316,0.3527287960309152,0.9316048080097395,0.0002575240668471541,351435.18740515393,40320.63699261721,0.39530952380952394,0.073021206240698
|
||||||
|
dqn,0.4,no_robust,10,316521.94295076875,3631.1820920182718,315859.66987697606,59129.03566963754,136.50715652926755,0.5085743959240285,0.931261495881483,0.00031280530251053175,315859.66987697606,59129.03566963754,0.3999999999999993,0.0
|
||||||
|
dqn,0.4,robust,5,308705.6422445519,10654.571556448245,273496.9231922617,68868.59270778317,137.13228376363833,0.9543108715306617,0.9315365574335323,0.0006302636717132419,273496.9231922617,68868.59270778317,0.49856666666666677,0.05745573175159429
|
||||||
|
dqn,0.6,no_robust,4,302011.2988903938,2354.1141598720183,280836.828756133,58683.00124997926,137.4522093492651,0.4692723362517602,0.9317606434396914,0.0003317518021682495,280836.828756133,58683.00124997926,0.600000000000001,0.0
|
||||||
|
dqn,0.6,robust,5,285861.2870026513,10386.571631344234,252328.08164747176,59388.56063758225,136.8191461176243,1.0629203361893034,0.9314152691051373,0.0005692783702932289,252328.08164747176,59388.56063758225,0.7361999999999991,0.07108625433623189
|
||||||
|
dqn,0.8,no_robust,6,282459.51189759385,2625.018247527438,273845.72691287595,66378.16690732416,137.4075681801531,0.29728950101826707,0.9317196295169007,0.00022799290978965786,273845.72691287595,66378.16690732416,0.7999999999999985,0.0
|
||||||
|
dqn,0.8,robust,7,264267.62923122395,6771.288971321149,218548.7825016336,50043.2009443344,136.61083454541807,1.2319662937254596,0.9310772310393415,0.0010118564779437284,218548.7825016336,50043.2009443344,0.9532857142857143,0.04709817507333055
|
||||||
|
linear,0.0,no_robust,8,350250.9723061577,3156.286820918861,304636.59490360576,71682.88027353655,134.2397614654424,0.32611787466946035,0.9302824910938235,0.00024020749661685483,304636.59490360576,71682.88027353655,,
|
||||||
|
linear,0.0,robust,9,335283.29842027643,7707.594869976611,284529.36318678834,55524.58819004573,134.1784835571803,0.4477314164684001,0.9302064136530284,0.00034781034181738526,284529.36318678834,55524.58819004573,,
|
||||||
|
linear,0.1,no_robust,5,342052.1032713031,2576.546352056584,365492.17954557994,44890.93522299766,134.65068807375954,0.2181027640393531,0.930569018064469,0.00014058935916940913,365492.17954557994,44890.93522299766,,
|
||||||
|
linear,0.1,robust,3,317520.7033697644,4796.580459456527,268822.39599036984,39256.421140635124,134.28259038297355,0.24570499109363475,0.9303174892809594,0.00018817899183709092,268822.39599036984,39256.421140635124,,
|
||||||
|
linear,0.25,no_robust,9,328288.0441241802,2178.525494145428,330011.0898339667,38591.36053388808,134.48799697074742,0.2199303973026469,0.9304619997297959,0.00015341642413402035,330011.0898339667,38591.36053388808,,
|
||||||
|
linear,0.25,robust,6,313447.18464460893,11811.426711620714,303500.9103775427,63358.917144214036,134.3506492062661,0.2947034403278951,0.9303678834855621,0.00021446628431268986,303500.9103775427,63358.917144214036,,
|
||||||
|
linear,0.4,no_robust,11,313414.0672597746,1982.9537556159262,297576.7714904776,69396.90446617964,134.2708754290745,0.3062093691351849,0.9302780292522507,0.00023067974755288992,297576.7714904776,69396.90446617964,,
|
||||||
|
linear,0.4,robust,4,296217.3030037579,5109.898340355844,223056.66939230284,38293.73688466607,134.3930461989178,0.12347753686382154,0.9303851681232489,7.324605809708878e-05,223056.66939230284,38293.73688466607,,
|
||||||
|
linear,0.6,no_robust,3,294227.64307441004,2081.9176570448135,272686.62176604365,66672.50905805513,134.24327165069943,0.30764332256042104,0.9301795837547151,0.00020453921786790446,272686.62176604365,66672.50905805513,,
|
||||||
|
linear,0.6,robust,5,279943.5769165236,9866.031719660255,311104.3403319788,28363.930707781863,134.48578626304214,0.21280262186464388,0.9304402649517088,0.00020533894868120649,311104.3403319788,28363.930707781863,,
|
||||||
|
linear,0.8,no_robust,11,275586.89347174135,1618.038877505867,244268.4832547461,56201.44465269986,134.36933631960773,0.2845660213184439,0.9303723007028001,0.00017640716421186918,244268.4832547461,56201.44465269986,,
|
||||||
|
linear,0.8,robust,4,264746.4048959568,7976.6279174956235,260018.06433340814,57942.49882730146,134.3973875801433,0.31511916357643405,0.9304259195293998,0.00023606570471334208,260018.06433340814,57942.49882730146,,
|
||||||
|
qtable,0.0,no_robust,8,228675.52179404112,103199.70453252994,159575.94976328663,95848.81008103945,97.07014413321637,33.0637115678536,0.8925069648229078,0.04890522141482132,159575.94976328663,95848.81008103945,0.0,0.0
|
||||||
|
qtable,0.0,robust,9,210031.0645056426,84361.3834579348,192569.37544387113,116824.7880426837,107.43992336086447,21.41128645838254,0.9110738623425454,0.019188350719133364,192569.37544387113,116824.7880426837,0.11839814814814797,0.061909456985161225
|
||||||
|
qtable,0.1,no_robust,5,271809.0706466638,14898.209045050968,242616.60384397948,49181.45526408063,114.75666919996793,3.461383158930426,0.9189538140159812,0.002294693249439748,242616.60384397948,49181.45526408063,0.0999999999999998,0.0
|
||||||
|
qtable,0.1,robust,6,259259.66979111428,102995.29934229614,205408.80683136024,94155.1845420674,114.84507499572386,36.206421837506966,0.9076759158182646,0.048591979839360346,205408.80683136024,94155.1845420674,0.17577777777777767,0.06720562696899951
|
||||||
|
qtable,0.25,no_robust,5,281190.01916657295,70274.10208723843,252358.2126733039,129868.46825082717,126.29784427276161,15.368804047323954,0.9253103453385114,0.009044883517550522,252358.2126733039,129868.46825082717,0.25,0.0
|
||||||
|
qtable,0.25,robust,6,279655.6664235949,93056.2549557545,270791.6493078149,116021.46257259768,125.72023167886748,26.760714047253796,0.9219940068478834,0.022785695882060884,270791.6493078149,116021.46257259768,0.3681458333333334,0.08845114686619042
|
||||||
|
qtable,0.4,no_robust,6,287140.4669895195,32698.16434426399,287292.23388022534,83855.95000252876,127.07104066863859,9.200301166154173,0.9165535777734913,0.01306001923887748,287292.23388022534,83855.95000252876,0.3999999999999993,0.0
|
||||||
|
qtable,0.4,robust,8,271994.2088134287,79259.3185780895,249928.00800228326,88265.30801790548,127.53218784138639,23.406428094683015,0.9236582230962452,0.020073747007871224,249928.00800228326,88265.30801790548,0.510104166666667,0.09294655989347765
|
||||||
|
qtable,0.6,no_robust,6,243563.64469828535,67006.60707045678,199430.98211127534,79119.52886604435,121.15594411011905,17.91243944823949,0.9217533740470492,0.011558797825966702,199430.98211127534,79119.52886604435,0.600000000000001,0.0
|
||||||
|
qtable,0.6,robust,6,233986.0661496293,43155.478617087436,180342.8297722066,48117.79957836251,122.06411912587582,12.160951090203252,0.9233054544895802,0.006840854872863436,180342.8297722066,48117.79957836251,0.7698333333333333,0.09107066853090896
|
||||||
|
qtable,0.8,no_robust,2,267787.4017455507,1552.038101264713,217510.87340156303,45358.788584678456,133.9448981157492,0.47346860040111405,0.9293224278749692,0.0002998116010539045,217510.87340156303,45358.788584678456,0.7999999999999985,0.0
|
||||||
|
qtable,0.8,robust,5,215035.72080870424,32869.73253165852,201002.66408757586,63247.67956376057,118.92244403466557,8.586916805142152,0.9215306031138815,0.004644709320891907,201002.66408757586,63247.67956376057,0.9112000000000002,0.07381653307732307
|
||||||
|
static,0.0,no_robust,6,91388.75248869567,13415.65534300268,56431.15832748852,8525.098185703384,69.77689967440658,3.670744870085874,0.8715688236409825,0.005831496806767582,56431.15832748852,8525.098185703384,,
|
||||||
|
static,0.0,robust,5,86605.88143558228,7614.909395960895,70842.62730546412,8033.737230392738,71.08396037634955,3.6802889678420283,0.8741062925938301,0.005083911544334936,70842.62730546412,8033.737230392738,,
|
||||||
|
static,0.1,no_robust,5,86668.90445290186,8037.955688932984,65623.40881389238,19329.448262530004,71.73199185012882,4.199046495412734,0.874577067494122,0.006610505646022198,65623.40881389238,19329.448262530004,,
|
||||||
|
static,0.1,robust,8,88298.35690575185,9576.838833058617,60276.33022450666,13359.490452744656,72.0920243339594,6.7706096714767865,0.8745305748491641,0.010083585815241344,60276.33022450666,13359.490452744656,,
|
||||||
|
static,0.25,no_robust,6,95581.63603909909,8345.698435455577,85253.22060752509,13111.526873622026,74.43788116042678,2.1078820386097368,0.8774483618896327,0.0037254791853004897,85253.22060752509,13111.526873622026,,
|
||||||
|
static,0.25,robust,5,85642.97376233719,9472.880627242153,61637.13336374452,15937.429780623212,71.38649508309966,4.0264905454627264,0.8739285904097794,0.005323853359397925,61637.13336374452,15937.429780623212,,
|
||||||
|
static,0.4,no_robust,4,84465.04245981346,12101.831388745604,63613.81812329075,7778.361846092061,67.5782271530322,3.9088888968092,0.8666205147756862,0.007149121199217965,63613.81812329075,7778.361846092061,,
|
||||||
|
static,0.4,robust,3,86315.88251933573,8642.748496122398,78672.47758108922,17823.74997200773,71.24783962051879,2.790416943786253,0.8733839625792507,0.005990544453538607,78672.47758108922,17823.74997200773,,
|
||||||
|
static,0.6,no_robust,5,81385.88962988024,12343.523894997037,64752.43216774836,23486.779472906223,71.36959177224794,5.100226704959064,0.874353948320141,0.007787250295491337,64752.43216774836,23486.779472906223,,
|
||||||
|
static,0.6,robust,6,82424.78357829548,9831.886701625144,58689.56808824368,12672.506035553573,69.65698271038197,3.484982360048201,0.8701253899758701,0.005917711231889304,58689.56808824368,12672.506035553573,,
|
||||||
|
static,0.8,no_robust,7,73226.06364450825,4447.877985963851,54700.340767716196,14406.881298569717,68.32867561883204,3.68262917356943,0.8679204886788817,0.007467501164611224,54700.340767716196,14406.881298569717,,
|
||||||
|
static,0.8,robust,3,75922.69770770498,5046.089536162847,54849.564836072976,22780.98012221352,69.17784723148274,1.5268167784698885,0.8711991412754405,0.0033278715575433297,54849.564836072976,22780.98012221352,,
|
||||||
|
surge,0.0,no_robust,6,11975.290738176132,411.4052900076416,4418.832131346071,896.5828048394391,16.192056219479124,0.8040364003224534,0.4317940274006973,0.008271862690929055,4418.832131346071,896.5828048394391,,
|
||||||
|
surge,0.0,robust,6,11368.553135742462,623.8217438159004,4174.6562770928085,639.9963040241264,16.20693115067868,0.9853827520149101,0.4337249214539392,0.010371668289035135,4174.6562770928085,639.9963040241264,,
|
||||||
|
surge,0.1,no_robust,5,11739.084232858655,332.778792718381,5058.659087494994,1110.8409258976824,16.722948073839394,0.6578121995950104,0.4377682402562083,0.005683401047550787,5058.659087494994,1110.8409258976824,,
|
||||||
|
surge,0.1,robust,2,11908.871668592743,81.41250285550258,4045.8883900289775,784.7169500268457,16.5783528934624,0.4088194924856508,0.4359031943776225,0.004531137621699143,4045.8883900289775,784.7169500268457,,
|
||||||
|
surge,0.25,no_robust,7,11369.223138855004,236.1121240061105,4754.4980344481255,1038.0550037539617,16.359045119223275,0.3945156775653057,0.4329514652531622,0.0038762110261952457,4754.4980344481255,1038.0550037539617,,
|
||||||
|
surge,0.25,robust,10,11241.013200689158,684.503587066406,4673.284299575493,1187.78635131025,16.65989576694279,1.0515950311117155,0.4360264800834576,0.009701952962125513,4673.284299575493,1187.78635131025,,
|
||||||
|
surge,0.4,no_robust,6,11006.168409400554,364.6584583108646,4227.535704048808,1414.7964077877168,16.365391636138824,0.9138430058543858,0.4332855262584901,0.008024003783434592,4227.535704048808,1414.7964077877168,,
|
||||||
|
surge,0.4,robust,6,10533.13118175624,526.0758051960169,4256.093156292146,783.7965507386594,15.862646418833448,0.7732699435426456,0.42770414581632693,0.008967505611725135,4256.093156292146,783.7965507386594,,
|
||||||
|
surge,0.6,no_robust,5,10139.2472848498,97.448078425168,3251.037082975553,742.2100315641153,16.26429537781848,0.4432465691073604,0.4329686574409998,0.004121820888165019,3251.037082975553,742.2100315641153,,
|
||||||
|
surge,0.6,robust,2,10447.04164810777,524.0029334247373,5311.611043281193,1808.6200710093085,16.49826042222505,0.6088756908260344,0.43427092746638946,0.007817511630542989,5311.611043281193,1808.6200710093085,,
|
||||||
|
surge,0.8,no_robust,3,9678.259826640971,272.83530913170915,3204.3479815026553,556.8799617962688,16.840420745981802,0.4589959822922529,0.43920385308157944,0.004953937449529005,3204.3479815026553,556.8799617962688,,
|
||||||
|
surge,0.8,robust,3,10101.413689120902,526.8318040489241,4321.442189833087,1284.166148011517,17.737567199677557,0.6586775330563983,0.44627248379841095,0.004644261847052545,4321.442189833087,1284.166148011517,,
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
tier,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
|
||||||
|
dqn,no_robust,29,315185.66674813855,23538.781000060844,302576.8036266896,62951.88633145167,136.82560356086017,1.3692652218935986,0.9313739013618878,0.0009314135057224836,302576.8036266896,62951.88633145167,0.45740740740740693,0.2368477698794438
|
||||||
|
dqn,robust,37,306875.13950902375,27585.74444520695,283724.7169827867,69843.05611741856,136.68837571992978,2.3797541654948753,0.9312171495138941,0.0016512408492580111,283724.7169827867,69843.05611741856,0.5058198198198196,0.28324483129860284
|
||||||
|
linear,no_robust,47,315501.15296155965,27105.014861872147,298149.1730416604,67664.7308344108,134.36884359609928,0.29743647613433244,0.9303607531364,0.0002152647006739543,298149.1730416604,67664.7308344108,,
|
||||||
|
linear,robust,31,306269.9232239004,26399.875293394463,279872.824370329,54401.104602086416,134.32737693008372,0.31909212993628877,0.9303375215162144,0.00025000448833182963,279872.824370329,54401.104602086416,,
|
||||||
|
qtable,no_robust,32,259818.72178238883,67188.58622318009,222088.83510765125,94450.12569617687,116.84641954166946,22.42810298937963,0.9140582213134033,0.02778864370791322,222088.83510765125,94450.12569617687,0.29218749999999993,0.2559326319498438
|
||||||
|
qtable,robust,40,244470.50673219413,78666.30912808319,216920.53697298188,93983.50987622296,118.94013969887506,23.1428303249914,0.9178608956089163,0.023827311253270544,216920.53697298188,93983.50987622296,0.4396239583333334,0.29521865862482416
|
||||||
|
static,no_robust,33,85228.452028227,12041.415672002751,64828.579890468536,17681.280330831738,70.58818912317687,4.204964531595236,0.8721419294578765,0.007107262779462876,64828.579890468536,17681.280330831738,,
|
||||||
|
static,robust,30,84963.18577955024,8926.291379160475,63243.76603076817,14880.924342692271,70.94358095957392,4.363134562111469,0.8730306888410219,0.006660289247744752,63243.76603076817,14880.924342692271,,
|
||||||
|
surge,no_robust,32,11121.867310184698,809.9895800277001,4260.038064073964,1160.4282377968032,16.416108827015794,0.641203520341943,0.43413855082681374,0.006214799767130059,4260.038064073964,1160.4282377968032,,
|
||||||
|
surge,robust,29,10994.355365953365,750.5115890942825,4448.160863178768,1000.7519971246122,16.495943148858906,0.9823026347466668,0.4347587896392907,0.009698591291108968,4448.160863178768,1000.7519971246122,,
|
||||||
|
@@ -0,0 +1,26 @@
|
|||||||
|
Name,tier,alpha,mode,objective/score,eval/revenue_mean,eval/reward_mean,eval/coi_level_mean,lambda_coi,robust_radius,learning_rate,batch_size,n_steps,total_timesteps
|
||||||
|
eager-sweep-244,dqn,0.0,no_robust,413274.4339549909,355872.06196128257,413274.4339549909,136.722140138007,0.2,0.1,0.0003,256,4096,15000
|
||||||
|
efficient-sweep-319,linear,0.0,no_robust,410094.0151741567,353309.5198146561,410094.0151741567,134.55152038805429,0.4,0.1,0.001,128,4096,15000
|
||||||
|
swept-sweep-422,linear,0.0,no_robust,403130.32747386186,347611.2815474988,403130.32747386186,133.8559785775022,0.4,0.3,0.0001,512,1024,15000
|
||||||
|
decent-sweep-478,linear,0.1,no_robust,400452.36418713134,345284.5750647792,400452.36418713134,134.73082941975588,0.1,0.2,0.001,128,1024,50000
|
||||||
|
eternal-sweep-339,linear,0.1,no_robust,399628.4231731644,344154.38525771734,399628.4231731644,134.89479277649667,0.4,0.1,0.0001,256,1024,50000
|
||||||
|
ethereal-sweep-21,dqn,0.1,no_robust,398492.807245857,343580.6802427996,398492.807245857,136.67160732585188,0.1,0.2,0.001,512,2048,50000
|
||||||
|
dark-sweep-418,linear,0.1,no_robust,394615.3720658343,339749.76272695075,394615.3720658343,134.39233246711,0.2,0.1,0.0003,256,1024,50000
|
||||||
|
wandering-sweep-122,dqn,0.0,robust,394061.3617726404,339512.43434806296,394061.3617726404,137.6864755964331,0.1,0.3,0.0001,256,2048,30000
|
||||||
|
laced-sweep-132,dqn,0.1,robust,389274.54998495104,335600.5979215904,389274.54998495104,137.36888574027677,0.4,0.2,0.001,256,2048,30000
|
||||||
|
rich-sweep-53,qtable,0.0,robust,388601.2626147048,335630.6853337664,388601.2626147048,133.4414069888203,0.2,0.1,0.0001,512,1024,50000
|
||||||
|
faithful-sweep-430,qtable,0.25,no_robust,387035.6970938766,333255.5771210341,387035.6970938766,137.4906091183188,0.1,0.2,0.0003,128,1024,15000
|
||||||
|
dark-sweep-280,qtable,0.25,no_robust,386318.8845004527,332220.0316564078,386318.8845004527,137.26992450099925,0.4,0.1,0.0001,256,1024,50000
|
||||||
|
chocolate-sweep-383,linear,0.25,no_robust,383989.49015403807,331071.7003244704,383989.49015403807,134.60590742050857,0.1,0.2,0.001,512,1024,30000
|
||||||
|
dry-sweep-263,dqn,0.0,robust,383372.6880637367,330436.0312615148,383372.6880637367,137.40558130223476,0.1,0.3,0.001,128,1024,50000
|
||||||
|
different-sweep-143,qtable,0.0,robust,383278.4198015018,330546.16800945485,383278.4198015018,135.9021538079678,0.1,0.3,0.001,256,2048,30000
|
||||||
|
woven-sweep-139,dqn,0.25,robust,382788.1296637251,329427.735752473,382788.1296637251,136.8968339394894,0.1,0.1,0.001,512,1024,15000
|
||||||
|
dark-sweep-215,dqn,0.25,robust,382358.2401374872,329330.0097603144,382358.2401374872,137.64528612332785,0.2,0.1,0.0001,512,4096,30000
|
||||||
|
charmed-sweep-136,linear,0.25,no_robust,382249.5728044314,329646.2053260979,382249.5728044314,134.46825608007862,0.4,0.1,0.0001,256,2048,15000
|
||||||
|
light-sweep-308,linear,0.0,robust,381939.1275250679,329628.9436641051,381939.1275250679,133.6209821974879,0.2,0.2,0.001,128,4096,30000
|
||||||
|
treasured-sweep-325,linear,0.25,robust,381322.0104772589,328353.58675398555,381322.0104772589,134.8950293943581,0.1,0.1,0.0001,512,2048,15000
|
||||||
|
fine-sweep-202,dqn,0.25,robust,378751.33572275366,326518.9068184018,378751.33572275366,137.2900973301052,0.1,0.2,0.0001,512,2048,30000
|
||||||
|
treasured-sweep-380,linear,0.25,no_robust,377898.0979419424,325869.1953595453,377898.0979419424,134.54118723889738,0.4,0.3,0.001,128,1024,50000
|
||||||
|
pretty-sweep-49,qtable,0.25,robust,377318.4766808995,325282.0152823859,377318.4766808995,137.19609012644068,0.4,0.1,0.0001,128,4096,50000
|
||||||
|
desert-sweep-253,linear,0.25,robust,376808.6335063269,325146.3478714648,376808.6335063269,134.48396340732663,0.2,0.1,0.0003,256,1024,30000
|
||||||
|
jolly-sweep-133,qtable,0.4,no_robust,376419.57394710975,323709.24588324485,376419.57394710975,137.8349363778071,0.1,0.3,0.0001,128,2048,50000
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,7 @@
|
|||||||
|
alpha,runs_robust,runs_no_robust,eval_revenue_mean_robust,eval_revenue_mean_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_robust,eval_reward_mean_no_robust,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_robust,eval_coi_level_mean_no_robust,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_coi_leakage_mean_robust,eval_coi_leakage_mean_no_robust,eval_coi_leakage_mean_delta,eval_coi_leakage_mean_delta_pct,eval_volatility_mean_robust,eval_volatility_mean_no_robust,eval_volatility_mean_delta,eval_volatility_mean_delta_pct,eval_margin_mean_robust,eval_margin_mean_no_robust,eval_margin_mean_delta,eval_margin_mean_delta_pct,train_alpha_adv_robust,train_alpha_adv_no_robust,train_alpha_adv_delta,train_alpha_adv_delta_pct,train_coi_penalty_robust,train_coi_penalty_no_robust,train_coi_penalty_delta,train_coi_penalty_delta_pct,train_ux_penalty_robust,train_ux_penalty_no_robust,train_ux_penalty_delta,train_ux_penalty_delta_pct,train_agent_prob_robust,train_agent_prob_no_robust,train_agent_prob_delta,train_agent_prob_delta_pct
|
||||||
|
0.0,4.0,4.0,3379.9042994670963,3565.2912010160844,-185.38690154898813,-5.199768857482219,313527.4707462,331300.229069,-17772.758322799986,-5.364547550342456,137.08358925982625,137.28764358955686,-0.2040543297306101,-0.14863269875959326,0.1146626165658294,0.11861133504329742,-0.003948718477468013,-3.3291240470622716,0.06687153537785637,0.06445662162531288,0.0024149137525434905,3.746572022625408,0.9315273502623671,0.9317078361627993,-0.00018048590043218127,-0.019371512552207898,0.18958333333333333,,,,5.553200113221484,,,,61.35134238638615,66.58479574844135,-5.233453362055201,-7.859832418540847,0.12778212146468534,0.11615891320235115,0.011623208262334192,10.00629907933654
|
||||||
|
0.1,4.0,4.0,3307.028238366196,3458.002436284769,-150.97419791857283,-4.365936713473732,306772.49146475,321215.477968,-14442.986503249966,-4.4963544704059375,137.1182041122497,136.82757579763506,0.29062831461465066,0.21240478238427865,0.1128546052304944,0.11704917861668755,-0.004194573386193154,-3.5835991638433753,0.0685405649303561,0.06737596899527175,0.0011645959350843477,1.728503430007924,0.9315331673960889,0.9313276818191593,0.00020548557692967595,0.0220637248243606,0.2818749999999999,0.1,0.18187499999999987,181.87499999999986,5.079528726095333,,,,52.44772950699336,53.288869747139515,-0.841140240146153,-1.578453895039319,0.11644381911386253,0.11765277436070229,-0.0012089552468397546,-1.0275620387270383
|
||||||
|
0.25,4.0,4.0,3134.3438215278165,3300.5539051855053,-166.21008365768876,-5.035823938416998,290691.4771835,306522.90003785,-15831.422854350007,-5.16484179563586,136.89990884669214,136.71752459667877,0.18238425001337077,0.1334022471160229,0.11113957413522965,0.1139905600539111,-0.0028509859186814507,-2.50107194607439,0.06427159998376095,0.06846858821082077,-0.004196988227059828,-6.12980103246314,0.9314501501825461,0.9313053225630614,0.0001448276194846443,0.015551035302371268,0.44833333333333336,0.25,0.19833333333333336,79.33333333333334,4.7183804755060255,,,,49.04307009982127,55.2030005738411,-6.159930474019831,-11.158687770568074,0.10998505830218755,0.11684259343269415,-0.0068575351305066035,-5.869037077182653
|
||||||
|
0.4,4.0,4.0,2983.852437569374,3180.7872854626567,-196.9348478932825,-6.191386918369099,276545.26309355,295433.5405797,-18888.277486150037,-6.393409986248494,136.19210761854086,136.5783021470118,-0.38619452847095204,-0.2827641890402586,0.10875560547061063,0.11189234314151972,-0.0031367376709090927,-2.8033532794480807,0.07452230347799255,0.07104688223410768,0.003475421243884863,4.891729425132195,0.9307282962514367,0.9310542820602117,-0.0003259858087749645,-0.03501254599824534,0.5999999999999999,0.4000000000000001,0.1999999999999998,49.999999999999936,4.174996403604185,,,,47.99794119802058,50.794260008988424,-2.796318810967847,-5.505186630286606,0.10222958892923095,0.11161526349272373,-0.009385674563492777,-8.408952565976458
|
||||||
|
0.6,4.0,4.0,2789.0434220430398,2982.2460998252786,-193.20267778223888,-6.4784283830083,258688.11700405,277051.95613675,-18363.8391327,-6.628301560749781,136.86774320500828,136.81931587629953,0.04842732870875466,0.035395096371142916,0.10501047827147733,0.10802266412956946,-0.0030121858580921257,-2.788475809557069,0.06914180963767007,0.06698591531512615,0.0021558943225439137,3.2184292957732996,0.9314130089130337,0.9313849217310588,2.8087181974889575e-05,0.003015636319588161,0.7733333333333334,0.5999999999999999,0.17333333333333356,28.888888888888935,4.178300996512875,,,,39.928062615509425,47.86860429278531,-7.940541677275881,-16.588203885594947,0.11297979438696983,0.1162670925925253,-0.0032872982055554695,-2.827367686122743
|
||||||
|
0.8,4.0,4.0,2586.098242115281,2841.1305915063504,-255.03234939106915,-8.97643882169642,239765.24959855,264140.55002745,-24375.300428900024,-9.228155399224729,136.5038826686135,137.28163778418497,-0.7777551155714661,-0.5665397995864124,0.10253056902792507,0.1031498585902154,-0.0006192895622903344,-0.6003784888844036,0.07325665736408164,0.06592454978099352,0.007332107583088124,11.1219683827132,0.9311235469993302,0.9316596013994161,-0.0005360544000858614,-0.05753758124541101,1.0,0.8000000000000002,0.19999999999999984,24.99999999999998,3.5384100686094007,,,,37.14414699970415,37.43809775029793,-0.29395075059377973,-0.7851647606519765,0.09990322635678014,0.10432800196112454,-0.0044247756043444,-4.241215705437541
|
||||||
|
@@ -0,0 +1,13 @@
|
|||||||
|
alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
|
||||||
|
0.0,no_robust,4,3565.2912010160844,52.219179508209216,331300.229069,5038.96659004527,137.28764358955686,0.6434240315013728,0.11861133504329742,0.004019332768284657,0.06445662162531288,0.004080405219050139,0.9317078361627993,0.00038018051704976865,,,,,66.58479574844135,32.282270089830455,0.11615891320235115,0.016558627227281013
|
||||||
|
0.0,robust,4,3379.9042994670963,54.727408939657735,313527.4707462,5408.058196552377,137.08358925982625,1.047386315387148,0.1146626165658294,0.0025627354157035497,0.06687153537785637,0.008577061675868377,0.9315273502623671,0.0007274203134899985,0.18958333333333333,0.02083333333333336,5.553200113221484,0.45981481828856186,61.35134238638615,30.27964905193963,0.12778212146468534,0.027929667978205217
|
||||||
|
0.1,no_robust,4,3458.002436284769,60.75923217871363,321215.477968,6016.373193216596,136.82757579763506,1.1899102161551907,0.11704917861668755,0.0021220259908233973,0.06737596899527175,0.006801136773079149,0.9313276818191593,0.0008352263172197586,0.1,0.0,,,53.288869747139515,18.480340945815023,0.11765277436070229,0.017544197575138736
|
||||||
|
0.1,robust,4,3307.028238366196,35.58495715224888,306772.49146475,3488.2690530060245,137.1182041122497,0.8582218376452346,0.1128546052304944,0.0005963155492967403,0.0685405649303561,0.0050673362512629015,0.9315331673960889,0.0005217376436765336,0.2818749999999999,0.03624999999999999,5.079528726095333,0.6109585102054891,52.44772950699336,29.0263361696475,0.11644381911386253,0.021152545180088765
|
||||||
|
0.25,no_robust,4,3300.5539051855053,50.460978662647115,306522.90003785,4860.668937531515,136.71752459667877,0.7410676951244369,0.1139905600539111,0.003319948537321803,0.06846858821082077,0.008614994548315848,0.9313053225630614,0.0004919872662680591,0.25,0.0,,,55.2030005738411,26.88247558235345,0.11684259343269415,0.013462146346772591
|
||||||
|
0.25,robust,4,3134.3438215278165,64.06834403659167,290691.4771835,6331.196493752059,136.89990884669214,1.3796663751798552,0.11113957413522965,0.0015044942041406348,0.06427159998376095,0.0042331619171274894,0.9314501501825461,0.0008939739741734515,0.44833333333333336,0.0033333333333333518,4.7183804755060255,0.4538389380858333,49.04307009982127,28.20484665432831,0.10998505830218755,0.010731404693185651
|
||||||
|
0.4,no_robust,4,3180.7872854626567,71.87564776824694,295433.5405797,7035.374110540269,136.5783021470118,1.7095219574599192,0.11189234314151972,0.0013821115134030936,0.07104688223410768,0.005766138692685495,0.9310542820602117,0.0013989725050689828,0.4000000000000001,0.0,,,50.794260008988424,24.836708377642946,0.11161526349272373,0.005787749200301594
|
||||||
|
0.4,robust,4,2983.852437569374,45.51290575912758,276545.26309355,4555.1725323898245,136.19210761854086,1.5546063667946701,0.10875560547061063,0.001118798290958954,0.07452230347799255,0.0040446395928049874,0.9307282962514367,0.0013558080014763189,0.5999999999999999,0.0,4.174996403604185,0.12189448324552496,47.99794119802058,33.51782503281748,0.10222958892923095,0.0031686467591609474
|
||||||
|
0.6,no_robust,4,2982.2460998252786,39.93674476199945,277051.95613675,3931.02017169463,136.81931587629953,1.1995405806950865,0.10802266412956946,0.000405835985606262,0.06698591531512615,0.002805894772223563,0.9313849217310588,0.0008100530228792662,0.5999999999999999,0.0,,,47.86860429278531,23.830502772642472,0.1162670925925253,0.028676813474186293
|
||||||
|
0.6,robust,4,2789.0434220430398,35.297482315631626,258688.11700405,3420.6735023624556,136.86774320500828,0.7097303238857778,0.10501047827147733,0.0008273121554488608,0.06914180963767007,0.009066158371268139,0.9314130089130337,0.0005024421703994162,0.7733333333333334,0.053333333333333385,4.178300996512875,0.5865970573865015,39.928062615509425,30.25078643153115,0.11297979438696983,0.0274101056520461
|
||||||
|
0.8,no_robust,4,2841.1305915063504,21.84043179776092,264140.55002745,2073.353315114627,137.28163778418497,0.6288968799501957,0.1031498585902154,0.0012877581835795701,0.06592454978099352,0.00340700896766341,0.9316596013994161,0.00038430108058413553,0.8000000000000002,0.0,,,37.43809775029793,32.01740090550489,0.10432800196112454,0.018337841526911584
|
||||||
|
0.8,robust,4,2586.098242115281,48.05539265296157,239765.24959855,4681.6472175597555,136.5038826686135,1.0611320896043694,0.10253056902792507,0.002587472569909977,0.07325665736408164,0.0015359324114246234,0.9311235469993302,0.0006145440308596868,1.0,0.0,3.5384100686094007,0.391972726035734,37.14414699970415,25.614063825315505,0.09990322635678014,0.010269342031085898
|
||||||
|
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"revenue_delta": -191.29017636530716,
|
||||||
|
"revenue_delta_pct": -5.938226273545598,
|
||||||
|
"coi_leakage_delta": -0.002960415145605702,
|
||||||
|
"coi_leakage_delta_pct": -2.6404147469510946
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
|
||||||
|
no_robust,24,3221.335253213441,262.46595166337727,299277.442303125,24382.561944761477,136.9186666318945,1.0038463876967063,0.11211932326253345,0.005805494533542669,0.06737642102693879,0.005402738047823369,0.9314066076226178,0.0007436370959663933,0.43,0.2546411303445653,,,51.86293802024894,25.340287421525442,0.11381077317368686,0.016664235359362907
|
||||||
|
robust,24,3030.0450768481337,288.262657026656,280998.34484843333,26820.020161880373,136.77757261848845,1.06224696086916,0.10915890811692774,0.004616462637659704,0.06943407846195294,0.006435789449278624,0.9312959200008004,0.0007858424519830652,0.5488541666666666,0.2860373751485706,4.540469463924883,0.7906156355346259,47.985382134405825,27.407657819442747,0.11155393475895271,0.01943348418653492
|
||||||
|
@@ -0,0 +1,25 @@
|
|||||||
|
alpha,metric,direction,wins,ties,total_pairs,win_probability
|
||||||
|
0.0,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.0,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.0,eval/coi_leakage_mean,lower,14,0,16,0.875
|
||||||
|
0.0,eval/volatility_mean,lower,8,0,16,0.5
|
||||||
|
0.1,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.1,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.1,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||||
|
0.1,eval/volatility_mean,lower,8,0,16,0.5
|
||||||
|
0.25,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.25,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.25,eval/coi_leakage_mean,lower,12,0,16,0.75
|
||||||
|
0.25,eval/volatility_mean,lower,11,0,16,0.6875
|
||||||
|
0.4,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.4,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.4,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||||
|
0.4,eval/volatility_mean,lower,6,0,16,0.375
|
||||||
|
0.6,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.6,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.6,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||||
|
0.6,eval/volatility_mean,lower,7,0,16,0.4375
|
||||||
|
0.8,eval/revenue_mean,higher,0,0,16,0.0
|
||||||
|
0.8,eval/reward_mean,higher,0,0,16,0.0
|
||||||
|
0.8,eval/coi_leakage_mean,lower,11,0,16,0.6875
|
||||||
|
0.8,eval/volatility_mean,lower,0,0,16,0.0
|
||||||
|
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/final/plots/final_focus_revenue_by_alpha.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.95\linewidth]{chapters/figures/results/generated/final/plots/final_focus_revenue_delta.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.95\linewidth]{chapters/figures/results/generated/final/plots/final_focus_risk_deltas.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.99\linewidth]{chapters/figures/results/generated/legacy/plots/first_sweep_tier_revenue.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_alpha_curves.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_delta_curves.pdf}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
\includegraphics[width=0.88\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_tradeoff_scatter.pdf}
|
||||||
313
paper/src/chapters/figures/results/plot_results.py
Normal file
313
paper/src/chapters/figures/results/plot_results.py
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.ticker import FuncFormatter
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from process_first_sweep import run as run_first_sweep
|
||||||
|
from process_ppo_benchmark import run as run_ppo_benchmark
|
||||||
|
|
||||||
|
|
||||||
|
def _output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_dir() -> Path:
|
||||||
|
return _output_dir() / "plots"
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_style() -> None:
|
||||||
|
plt.rcParams.update(
|
||||||
|
{
|
||||||
|
"font.family": "serif",
|
||||||
|
"font.size": 10,
|
||||||
|
"axes.titlesize": 10,
|
||||||
|
"axes.labelsize": 9,
|
||||||
|
"legend.fontsize": 8,
|
||||||
|
"xtick.labelsize": 8,
|
||||||
|
"ytick.labelsize": 8,
|
||||||
|
"figure.dpi": 220,
|
||||||
|
"savefig.dpi": 320,
|
||||||
|
"axes.spines.top": False,
|
||||||
|
"axes.spines.right": False,
|
||||||
|
"axes.grid": True,
|
||||||
|
"grid.alpha": 0.22,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_thousands(value: float, _: int) -> str:
|
||||||
|
return f"{int(value):,}"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_csv(path: Path) -> pd.DataFrame:
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Missing required input: {path}")
|
||||||
|
return pd.read_csv(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_ppo_alpha_curves(alpha_mode: pd.DataFrame, out_dir: Path) -> Path:
|
||||||
|
fig, axes = plt.subplots(2, 2, figsize=(9.3, 6.4), constrained_layout=True)
|
||||||
|
robust_color = "#C44E52"
|
||||||
|
baseline_color = "#4C72B0"
|
||||||
|
mode_colors = {"robust": robust_color, "no_robust": baseline_color}
|
||||||
|
mode_labels = {"robust": "Robust", "no_robust": "Non-robust"}
|
||||||
|
|
||||||
|
panels = [
|
||||||
|
("eval_revenue_mean", "Mean Episode Revenue", "Revenue"),
|
||||||
|
("eval_reward_mean", "Mean Episode Reward", "Reward"),
|
||||||
|
("eval_coi_leakage_mean", "Mean COI Leakage", "COI Leakage"),
|
||||||
|
("eval_volatility_mean", "Mean Price Volatility", "Volatility"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for ax, (metric_prefix, title, ylabel) in zip(axes.flat, panels):
|
||||||
|
mean_col = f"{metric_prefix}_mean"
|
||||||
|
std_col = f"{metric_prefix}_std"
|
||||||
|
for mode in ("no_robust", "robust"):
|
||||||
|
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||||
|
if sub.empty:
|
||||||
|
continue
|
||||||
|
x = sub["alpha"].to_numpy(dtype=float)
|
||||||
|
y = sub[mean_col].to_numpy(dtype=float)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=mode_colors[mode],
|
||||||
|
label=mode_labels[mode],
|
||||||
|
)
|
||||||
|
if std_col in sub.columns:
|
||||||
|
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
|
||||||
|
ax.fill_between(
|
||||||
|
x,
|
||||||
|
y - sigma,
|
||||||
|
y + sigma,
|
||||||
|
color=mode_colors[mode],
|
||||||
|
alpha=0.14,
|
||||||
|
linewidth=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set_title(title)
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
|
||||||
|
if metric_prefix in {"eval_revenue_mean", "eval_reward_mean"}:
|
||||||
|
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||||
|
|
||||||
|
handles, labels = axes.flat[0].get_legend_handles_labels()
|
||||||
|
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.02))
|
||||||
|
|
||||||
|
out_path = out_dir / "ppo_alpha_curves.pdf"
|
||||||
|
fig.savefig(out_path, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_ppo_delta_curves(deltas: pd.DataFrame, out_dir: Path) -> Path:
|
||||||
|
fig, axes = plt.subplots(2, 1, figsize=(8.6, 6.0), constrained_layout=True)
|
||||||
|
deltas = deltas.sort_values("alpha")
|
||||||
|
x = deltas["alpha"].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
top_metrics = [
|
||||||
|
("eval_revenue_mean_delta_pct", "Revenue", "#4C72B0"),
|
||||||
|
("eval_reward_mean_delta_pct", "Reward", "#8172B3"),
|
||||||
|
]
|
||||||
|
for col, label, color in top_metrics:
|
||||||
|
axes[0].plot(
|
||||||
|
x,
|
||||||
|
deltas[col].to_numpy(dtype=float),
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=color,
|
||||||
|
label=label,
|
||||||
|
)
|
||||||
|
axes[0].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
axes[0].set_title("Robust Minus Non-robust Delta by Contamination")
|
||||||
|
axes[0].set_ylabel("Delta (%)")
|
||||||
|
axes[0].set_xlabel(r"Contamination $\alpha$")
|
||||||
|
axes[0].set_xticks(x)
|
||||||
|
axes[0].legend(loc="lower left")
|
||||||
|
|
||||||
|
bottom_metrics = [
|
||||||
|
("eval_coi_leakage_mean_delta_pct", "COI Leakage", "#55A868"),
|
||||||
|
("eval_volatility_mean_delta_pct", "Volatility", "#DD8452"),
|
||||||
|
]
|
||||||
|
for col, label, color in bottom_metrics:
|
||||||
|
axes[1].plot(
|
||||||
|
x,
|
||||||
|
deltas[col].to_numpy(dtype=float),
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=color,
|
||||||
|
label=label,
|
||||||
|
)
|
||||||
|
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
axes[1].set_ylabel("Delta (%)")
|
||||||
|
axes[1].set_xlabel(r"Contamination $\alpha$")
|
||||||
|
axes[1].set_xticks(x)
|
||||||
|
axes[1].legend(loc="lower left")
|
||||||
|
|
||||||
|
out_path = out_dir / "ppo_delta_curves.pdf"
|
||||||
|
fig.savefig(out_path, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_ppo_tradeoff_scatter(deltas: pd.DataFrame, out_dir: Path) -> Path:
|
||||||
|
fig, ax = plt.subplots(figsize=(6.4, 5.2), constrained_layout=True)
|
||||||
|
data = deltas.sort_values("alpha")
|
||||||
|
x = data["eval_coi_leakage_mean_delta_pct"].to_numpy(dtype=float)
|
||||||
|
y = data["eval_revenue_mean_delta_pct"].to_numpy(dtype=float)
|
||||||
|
alphas = data["alpha"].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
scatter = ax.scatter(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
c=alphas,
|
||||||
|
cmap="viridis",
|
||||||
|
s=72,
|
||||||
|
edgecolor="#222222",
|
||||||
|
linewidth=0.5,
|
||||||
|
)
|
||||||
|
for x_i, y_i, alpha in zip(x, y, alphas):
|
||||||
|
ax.annotate(
|
||||||
|
rf"$\alpha={alpha:.2f}$",
|
||||||
|
(x_i, y_i),
|
||||||
|
textcoords="offset points",
|
||||||
|
xytext=(5, 4),
|
||||||
|
fontsize=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.axhline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||||
|
ax.axvline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||||
|
ax.set_xlabel("COI Leakage Delta (%)")
|
||||||
|
ax.set_ylabel("Revenue Delta (%)")
|
||||||
|
ax.set_title("PPO Robust Tradeoff Frontier")
|
||||||
|
cbar = fig.colorbar(scatter, ax=ax)
|
||||||
|
cbar.set_label(r"Contamination $\alpha$")
|
||||||
|
|
||||||
|
out_path = out_dir / "ppo_tradeoff_scatter.pdf"
|
||||||
|
fig.savefig(out_path, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_first_sweep_tier_revenue(tier_mode: pd.DataFrame, out_dir: Path) -> Path:
|
||||||
|
pivot = (
|
||||||
|
tier_mode.pivot(index="tier", columns="mode", values="eval_revenue_mean_mean")
|
||||||
|
.dropna(subset=["robust", "no_robust"], how="any")
|
||||||
|
.copy()
|
||||||
|
)
|
||||||
|
if pivot.empty:
|
||||||
|
raise ValueError("First sweep tier summary missing robust/non-robust pairs")
|
||||||
|
|
||||||
|
order = sorted(pivot.index.tolist())
|
||||||
|
pivot = pivot.loc[order]
|
||||||
|
delta_pct = 100.0 * (pivot["robust"] - pivot["no_robust"]) / pivot["no_robust"]
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(1, 2, figsize=(10.2, 4.3), constrained_layout=True)
|
||||||
|
x = np.arange(len(order))
|
||||||
|
width = 0.36
|
||||||
|
|
||||||
|
axes[0].bar(
|
||||||
|
x - width / 2,
|
||||||
|
pivot["no_robust"].to_numpy(dtype=float),
|
||||||
|
width=width,
|
||||||
|
label="Non-robust",
|
||||||
|
color="#4C72B0",
|
||||||
|
)
|
||||||
|
axes[0].bar(
|
||||||
|
x + width / 2,
|
||||||
|
pivot["robust"].to_numpy(dtype=float),
|
||||||
|
width=width,
|
||||||
|
label="Robust",
|
||||||
|
color="#C44E52",
|
||||||
|
)
|
||||||
|
axes[0].set_xticks(x)
|
||||||
|
axes[0].set_xticklabels(order, rotation=20)
|
||||||
|
axes[0].set_ylabel("Mean Revenue")
|
||||||
|
axes[0].set_yscale("log")
|
||||||
|
axes[0].yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||||
|
axes[0].set_title("First Sweep Tier Revenue (log scale)")
|
||||||
|
axes[0].legend()
|
||||||
|
|
||||||
|
axes[1].bar(x, delta_pct.to_numpy(dtype=float), color="#55A868", width=0.55)
|
||||||
|
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
axes[1].set_xticks(x)
|
||||||
|
axes[1].set_xticklabels(order, rotation=20)
|
||||||
|
axes[1].set_ylabel("Revenue Delta (%)")
|
||||||
|
axes[1].set_title("Robust Minus Non-robust by Tier")
|
||||||
|
|
||||||
|
out_path = out_dir / "first_sweep_tier_revenue.pdf"
|
||||||
|
fig.savefig(out_path, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
|
||||||
|
def build_plots(data_dir: Path, out_dir: Path) -> list[Path]:
|
||||||
|
alpha_mode = _load_csv(data_dir / "ppo_alpha_mode_summary.csv")
|
||||||
|
deltas = _load_csv(data_dir / "ppo_alpha_deltas.csv")
|
||||||
|
tier_mode = _load_csv(data_dir / "first_sweep_tier_mode_summary.csv")
|
||||||
|
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
paths = [
|
||||||
|
_plot_ppo_alpha_curves(alpha_mode, out_dir),
|
||||||
|
_plot_ppo_delta_curves(deltas, out_dir),
|
||||||
|
_plot_ppo_tradeoff_scatter(deltas, out_dir),
|
||||||
|
_plot_first_sweep_tier_revenue(tier_mode, out_dir),
|
||||||
|
]
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Create paper-ready plots from result CSVs"
|
||||||
|
)
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=_output_dir())
|
||||||
|
parser.add_argument("--plot-dir", type=Path, default=_plot_dir())
|
||||||
|
parser.add_argument(
|
||||||
|
"--refresh-data",
|
||||||
|
action="store_true",
|
||||||
|
help="Regenerate processed CSVs before plotting",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
_configure_style()
|
||||||
|
|
||||||
|
if bool(args.refresh_data):
|
||||||
|
run_ppo_benchmark(
|
||||||
|
input_path=Path(__file__).resolve().parents[5]
|
||||||
|
/ "tpu_orchestration"
|
||||||
|
/ "results"
|
||||||
|
/ "ppo_benchmark.csv",
|
||||||
|
output_dir=args.data_dir,
|
||||||
|
include_non_finished=False,
|
||||||
|
)
|
||||||
|
run_first_sweep(
|
||||||
|
input_path=Path(__file__).resolve().parents[5]
|
||||||
|
/ "tpu_orchestration"
|
||||||
|
/ "results"
|
||||||
|
/ "first_sweep.csv",
|
||||||
|
output_dir=args.data_dir,
|
||||||
|
include_non_finished=False,
|
||||||
|
top_n=25,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = build_plots(data_dir=args.data_dir, out_dir=args.plot_dir)
|
||||||
|
for path in outputs:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
658
paper/src/chapters/figures/results/plot_wandb_export.py
Normal file
658
paper/src/chapters/figures/results/plot_wandb_export.py
Normal file
@@ -0,0 +1,658 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.ticker import FuncFormatter
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def _load_tikzplotlib():
|
||||||
|
def _patch_webcolors() -> None:
|
||||||
|
try:
|
||||||
|
import webcolors
|
||||||
|
|
||||||
|
if hasattr(webcolors, "CSS3_HEX_TO_NAMES"):
|
||||||
|
return
|
||||||
|
css3 = getattr(webcolors, "CSS3", "css3")
|
||||||
|
webcolors.CSS3_HEX_TO_NAMES = {
|
||||||
|
webcolors.name_to_hex(name, spec=css3): name
|
||||||
|
for name in webcolors.names(spec=css3)
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
_patch_webcolors()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from matplotlib.legend import Legend
|
||||||
|
|
||||||
|
if not hasattr(Legend, "_ncol") and hasattr(Legend, "_ncols"):
|
||||||
|
Legend._ncol = property(lambda self: self._ncols)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tikzplotlib as module
|
||||||
|
|
||||||
|
return module, None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
from matplotlib.backends import backend_pgf
|
||||||
|
|
||||||
|
if not hasattr(backend_pgf, "common_texification") and hasattr(
|
||||||
|
backend_pgf, "_tex_escape"
|
||||||
|
):
|
||||||
|
backend_pgf.common_texification = backend_pgf._tex_escape
|
||||||
|
|
||||||
|
_patch_webcolors()
|
||||||
|
|
||||||
|
import tikzplotlib as module
|
||||||
|
|
||||||
|
return module, None
|
||||||
|
except Exception as exc:
|
||||||
|
return None, exc
|
||||||
|
|
||||||
|
|
||||||
|
TIKZPLOTLIB, TIKZPLOTLIB_IMPORT_ERROR = _load_tikzplotlib()
|
||||||
|
|
||||||
|
|
||||||
|
def _default_output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "wandb"
|
||||||
|
|
||||||
|
|
||||||
|
def _default_plot_dir(output_dir: Path) -> Path:
|
||||||
|
return output_dir / "plots"
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize(key: str) -> str:
|
||||||
|
return key.replace("/", "_").replace("-", "_")
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_style() -> None:
|
||||||
|
plt.rcParams.update(
|
||||||
|
{
|
||||||
|
"font.family": "serif",
|
||||||
|
"font.size": 10,
|
||||||
|
"axes.titlesize": 10,
|
||||||
|
"axes.labelsize": 9,
|
||||||
|
"legend.fontsize": 8,
|
||||||
|
"xtick.labelsize": 8,
|
||||||
|
"ytick.labelsize": 8,
|
||||||
|
"figure.dpi": 220,
|
||||||
|
"savefig.dpi": 320,
|
||||||
|
"axes.spines.top": False,
|
||||||
|
"axes.spines.right": False,
|
||||||
|
"axes.grid": True,
|
||||||
|
"grid.alpha": 0.22,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_thousands(value: float, _: int) -> str:
|
||||||
|
return f"{int(value):,}"
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
|
||||||
|
for column in columns:
|
||||||
|
if column in frame.columns:
|
||||||
|
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["study/alpha"], errors="coerce")
|
||||||
|
if "alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["alpha"], errors="coerce")
|
||||||
|
return pd.Series(np.nan, index=frame.index, dtype=float)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/mode" in frame.columns:
|
||||||
|
mode = frame["study/mode"].astype(str).str.strip().str.lower()
|
||||||
|
mapping = {
|
||||||
|
"baseline": "baseline",
|
||||||
|
"no_robust": "baseline",
|
||||||
|
"defended": "defended",
|
||||||
|
"robust": "defended",
|
||||||
|
}
|
||||||
|
return mode.map(mapping).fillna("")
|
||||||
|
|
||||||
|
if "study/no_robust" in frame.columns:
|
||||||
|
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust > 0.5, "baseline", "defended"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
|
||||||
|
if "no_robust" in frame.columns:
|
||||||
|
no_robust = (
|
||||||
|
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
|
||||||
|
)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust, "baseline", "defended"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
|
||||||
|
return pd.Series("", index=frame.index, dtype="object")
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
|
||||||
|
data = frame.copy()
|
||||||
|
if not include_non_finished and "State" in data.columns:
|
||||||
|
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
|
||||||
|
|
||||||
|
data["alpha"] = _extract_alpha(data)
|
||||||
|
data["mode"] = _extract_mode(data)
|
||||||
|
data = data[data["mode"].isin({"baseline", "defended"})]
|
||||||
|
data = data[data["alpha"].notna()]
|
||||||
|
|
||||||
|
_coerce_numeric(
|
||||||
|
data,
|
||||||
|
[
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"eval/revenue_std",
|
||||||
|
"eval/reward_std",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"train/agent_prob",
|
||||||
|
"train/alpha_adv",
|
||||||
|
"lambda_coi",
|
||||||
|
"ambiguity_radius",
|
||||||
|
"n_products",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
return data.sort_values(["alpha", "mode"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _summary_by_alpha_mode(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||||
|
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||||
|
|
||||||
|
return (
|
||||||
|
frame.groupby(["alpha", "mode"], as_index=False)
|
||||||
|
.agg(**agg_spec)
|
||||||
|
.sort_values(["alpha", "mode"])
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _delta_by_alpha(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
rows: list[dict[str, float]] = []
|
||||||
|
for alpha, alpha_group in summary.groupby("alpha", sort=True):
|
||||||
|
defended = alpha_group[alpha_group["mode"] == "defended"]
|
||||||
|
baseline = alpha_group[alpha_group["mode"] == "baseline"]
|
||||||
|
if defended.empty or baseline.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
row: dict[str, float] = {
|
||||||
|
"alpha": float(alpha),
|
||||||
|
"runs_defended": float(defended["runs"].iloc[0]),
|
||||||
|
"runs_baseline": float(baseline["runs"].iloc[0]),
|
||||||
|
}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
defended_value = float(defended[f"{safe}_mean"].iloc[0])
|
||||||
|
baseline_value = float(baseline[f"{safe}_mean"].iloc[0])
|
||||||
|
delta = defended_value - baseline_value
|
||||||
|
row[f"{safe}_defended"] = defended_value
|
||||||
|
row[f"{safe}_baseline"] = baseline_value
|
||||||
|
row[f"{safe}_delta"] = delta
|
||||||
|
row[f"{safe}_delta_pct"] = (
|
||||||
|
np.nan if baseline_value == 0 else 100.0 * delta / baseline_value
|
||||||
|
)
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
return pd.DataFrame(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def _summary_by_parameter(
|
||||||
|
frame: pd.DataFrame, parameter: str, metrics: list[str]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
defended = frame[frame["mode"] == "defended"].copy()
|
||||||
|
defended = defended[defended[parameter].notna()].copy()
|
||||||
|
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||||
|
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||||
|
|
||||||
|
return (
|
||||||
|
defended.groupby(["alpha", parameter], as_index=False)
|
||||||
|
.agg(**agg_spec)
|
||||||
|
.sort_values(["alpha", parameter])
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_table(frame: pd.DataFrame, path: Path) -> Path:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
frame.to_csv(path, index=False)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _save_figure(fig: plt.Figure, pdf_path: Path, export_tikz: bool) -> list[Path]:
|
||||||
|
pdf_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fig.savefig(pdf_path, bbox_inches="tight")
|
||||||
|
written = [pdf_path]
|
||||||
|
|
||||||
|
if export_tikz:
|
||||||
|
if TIKZPLOTLIB is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"tikzplotlib import failed. Install/upgrade tikzplotlib and matplotlib-compatible dependencies. "
|
||||||
|
f"Original error: {TIKZPLOTLIB_IMPORT_ERROR}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from matplotlib.legend import Legend
|
||||||
|
from matplotlib.lines import Line2D
|
||||||
|
|
||||||
|
for legend in fig.findobj(Legend):
|
||||||
|
if not hasattr(legend, "_ncol") and hasattr(legend, "_ncols"):
|
||||||
|
setattr(legend, "_ncol", legend._ncols)
|
||||||
|
if not hasattr(legend, "legendHandles") and hasattr(
|
||||||
|
legend, "legend_handles"
|
||||||
|
):
|
||||||
|
setattr(legend, "legendHandles", legend.legend_handles)
|
||||||
|
|
||||||
|
for line in fig.findobj(Line2D):
|
||||||
|
if hasattr(line, "_us_dashSeq"):
|
||||||
|
continue
|
||||||
|
if not hasattr(line, "_dash_pattern"):
|
||||||
|
continue
|
||||||
|
dash_pattern = getattr(line, "_dash_pattern")
|
||||||
|
if not isinstance(dash_pattern, tuple) or len(dash_pattern) != 2:
|
||||||
|
continue
|
||||||
|
setattr(line, "_us_dashOffset", dash_pattern[0])
|
||||||
|
setattr(line, "_us_dashSeq", dash_pattern[1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
tikz_path = pdf_path.with_suffix(".tikz.tex")
|
||||||
|
TIKZPLOTLIB.save(str(tikz_path), figure=fig)
|
||||||
|
written.append(tikz_path)
|
||||||
|
|
||||||
|
plt.close(fig)
|
||||||
|
return written
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_alpha_curves(
|
||||||
|
alpha_mode: pd.DataFrame, out_dir: Path, export_tikz: bool
|
||||||
|
) -> list[Path]:
|
||||||
|
fig, axes = plt.subplots(2, 2, figsize=(9.3, 6.4), constrained_layout=True)
|
||||||
|
mode_colors = {"baseline": "#4C72B0", "defended": "#C44E52"}
|
||||||
|
mode_labels = {"baseline": "Baseline", "defended": "Defended"}
|
||||||
|
|
||||||
|
panels = [
|
||||||
|
("eval_revenue_mean", "Mean Episode Revenue", "Revenue"),
|
||||||
|
("eval_reward_mean", "Mean Episode Reward", "Reward"),
|
||||||
|
("eval_coi_leakage_mean", "Mean COI Leakage", "COI Leakage"),
|
||||||
|
("eval_volatility_mean", "Mean Price Volatility", "Volatility"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for ax, (metric_prefix, title, ylabel) in zip(axes.flat, panels):
|
||||||
|
mean_col = f"{metric_prefix}_mean"
|
||||||
|
std_col = f"{metric_prefix}_std"
|
||||||
|
for mode in ("baseline", "defended"):
|
||||||
|
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||||
|
if sub.empty:
|
||||||
|
continue
|
||||||
|
x = sub["alpha"].to_numpy(dtype=float)
|
||||||
|
y = sub[mean_col].to_numpy(dtype=float)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=mode_colors[mode],
|
||||||
|
label=mode_labels[mode],
|
||||||
|
)
|
||||||
|
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
|
||||||
|
ax.fill_between(
|
||||||
|
x,
|
||||||
|
y - sigma,
|
||||||
|
y + sigma,
|
||||||
|
color=mode_colors[mode],
|
||||||
|
alpha=0.14,
|
||||||
|
linewidth=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set_title(title)
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
|
||||||
|
if metric_prefix in {"eval_revenue_mean", "eval_reward_mean"}:
|
||||||
|
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||||
|
|
||||||
|
handles, labels = axes.flat[0].get_legend_handles_labels()
|
||||||
|
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.02))
|
||||||
|
return _save_figure(fig, out_dir / "wandb_alpha_curves.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_delta_curves(
|
||||||
|
deltas: pd.DataFrame, out_dir: Path, export_tikz: bool
|
||||||
|
) -> list[Path]:
|
||||||
|
fig, axes = plt.subplots(2, 1, figsize=(8.6, 6.0), constrained_layout=True)
|
||||||
|
deltas = deltas.sort_values("alpha")
|
||||||
|
x = deltas["alpha"].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
top_metrics = [
|
||||||
|
("eval_revenue_mean_delta_pct", "Revenue", "#4C72B0"),
|
||||||
|
("eval_reward_mean_delta_pct", "Reward", "#8172B3"),
|
||||||
|
]
|
||||||
|
for col, label, color in top_metrics:
|
||||||
|
axes[0].plot(
|
||||||
|
x,
|
||||||
|
deltas[col].to_numpy(dtype=float),
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=color,
|
||||||
|
label=label,
|
||||||
|
)
|
||||||
|
axes[0].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
axes[0].set_title("Defended Minus Baseline Delta by Contamination")
|
||||||
|
axes[0].set_ylabel("Delta (%)")
|
||||||
|
axes[0].set_xlabel(r"Contamination $\alpha$")
|
||||||
|
axes[0].set_xticks(x)
|
||||||
|
axes[0].legend(loc="lower left")
|
||||||
|
|
||||||
|
bottom_metrics = [
|
||||||
|
("eval_coi_leakage_mean_delta_pct", "COI Leakage", "#55A868"),
|
||||||
|
("eval_volatility_mean_delta_pct", "Volatility", "#DD8452"),
|
||||||
|
]
|
||||||
|
for col, label, color in bottom_metrics:
|
||||||
|
axes[1].plot(
|
||||||
|
x,
|
||||||
|
deltas[col].to_numpy(dtype=float),
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=color,
|
||||||
|
label=label,
|
||||||
|
)
|
||||||
|
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
axes[1].set_ylabel("Delta (%)")
|
||||||
|
axes[1].set_xlabel(r"Contamination $\alpha$")
|
||||||
|
axes[1].set_xticks(x)
|
||||||
|
axes[1].legend(loc="lower left")
|
||||||
|
|
||||||
|
return _save_figure(fig, out_dir / "wandb_delta_curves.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_tradeoff_scatter(
|
||||||
|
deltas: pd.DataFrame, out_dir: Path, export_tikz: bool
|
||||||
|
) -> list[Path]:
|
||||||
|
fig, ax = plt.subplots(figsize=(6.4, 5.2), constrained_layout=True)
|
||||||
|
data = deltas.sort_values("alpha")
|
||||||
|
x = data["eval_coi_leakage_mean_delta_pct"].to_numpy(dtype=float)
|
||||||
|
y = data["eval_revenue_mean_delta_pct"].to_numpy(dtype=float)
|
||||||
|
alphas = data["alpha"].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
scatter = ax.scatter(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
c=alphas,
|
||||||
|
cmap="viridis",
|
||||||
|
s=72,
|
||||||
|
edgecolor="#222222",
|
||||||
|
linewidth=0.5,
|
||||||
|
)
|
||||||
|
for x_i, y_i, alpha in zip(x, y, alphas):
|
||||||
|
ax.annotate(
|
||||||
|
rf"$\alpha={alpha:.2f}$",
|
||||||
|
(x_i, y_i),
|
||||||
|
textcoords="offset points",
|
||||||
|
xytext=(5, 4),
|
||||||
|
fontsize=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.axhline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||||
|
ax.axvline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||||
|
ax.set_xlabel("COI Leakage Delta (%)")
|
||||||
|
ax.set_ylabel("Revenue Delta (%)")
|
||||||
|
ax.set_title("Defended Tradeoff Frontier")
|
||||||
|
cbar = fig.colorbar(scatter, ax=ax)
|
||||||
|
cbar.set_label(r"Contamination $\alpha$")
|
||||||
|
|
||||||
|
return _save_figure(fig, out_dir / "wandb_tradeoff_scatter.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_reward_robustness(
|
||||||
|
alpha_mode: pd.DataFrame, out_dir: Path, export_tikz: bool
|
||||||
|
) -> list[Path]:
|
||||||
|
fig, ax = plt.subplots(figsize=(7.6, 4.5), constrained_layout=True)
|
||||||
|
mode_colors = {"baseline": "#4C72B0", "defended": "#C44E52"}
|
||||||
|
mode_labels = {"baseline": "Baseline", "defended": "Defended"}
|
||||||
|
|
||||||
|
for mode in ("baseline", "defended"):
|
||||||
|
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||||
|
x = sub["alpha"].to_numpy(dtype=float)
|
||||||
|
y = sub["eval_reward_mean_std"].fillna(0.0).to_numpy(dtype=float)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color=mode_colors[mode],
|
||||||
|
label=mode_labels[mode],
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set_title("Reward Robustness Across Contamination")
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel("Reward Std Across Runs")
|
||||||
|
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
|
||||||
|
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||||
|
ax.legend(loc="upper left")
|
||||||
|
return _save_figure(fig, out_dir / "wandb_reward_robustness.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_parameter_sensitivity(
|
||||||
|
summary: pd.DataFrame,
|
||||||
|
parameter: str,
|
||||||
|
out_name: str,
|
||||||
|
out_dir: Path,
|
||||||
|
export_tikz: bool,
|
||||||
|
) -> list[Path]:
|
||||||
|
fig, axes = plt.subplots(1, 2, figsize=(10.0, 4.2), constrained_layout=True)
|
||||||
|
values = sorted(summary[parameter].dropna().unique())
|
||||||
|
cmap = plt.get_cmap("viridis")
|
||||||
|
colors = [cmap(i) for i in np.linspace(0.1, 0.9, len(values))]
|
||||||
|
|
||||||
|
panels = [
|
||||||
|
("eval_revenue_mean", "Revenue"),
|
||||||
|
("eval_coi_leakage_mean", "COI Leakage"),
|
||||||
|
]
|
||||||
|
for ax, (metric_prefix, ylabel) in zip(axes, panels):
|
||||||
|
mean_col = f"{metric_prefix}_mean"
|
||||||
|
std_col = f"{metric_prefix}_std"
|
||||||
|
for value, color in zip(values, colors):
|
||||||
|
sub = summary[summary[parameter] == value].sort_values("alpha")
|
||||||
|
if sub.empty:
|
||||||
|
continue
|
||||||
|
x = sub["alpha"].to_numpy(dtype=float)
|
||||||
|
y = sub[mean_col].to_numpy(dtype=float)
|
||||||
|
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.6,
|
||||||
|
markersize=3.6,
|
||||||
|
color=color,
|
||||||
|
label=f"{parameter}={value:.2f}",
|
||||||
|
)
|
||||||
|
ax.fill_between(
|
||||||
|
x, y - sigma, y + sigma, color=color, alpha=0.10, linewidth=0
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.set_xticks(sorted(summary["alpha"].unique()))
|
||||||
|
if metric_prefix == "eval_revenue_mean":
|
||||||
|
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||||
|
|
||||||
|
axes[0].set_title(f"{parameter} Sensitivity (Defended)")
|
||||||
|
axes[1].set_title("Leakage Side-Effect")
|
||||||
|
handles, labels = axes[0].get_legend_handles_labels()
|
||||||
|
fig.legend(
|
||||||
|
handles,
|
||||||
|
labels,
|
||||||
|
ncol=max(1, len(values) // 2),
|
||||||
|
loc="upper center",
|
||||||
|
bbox_to_anchor=(0.5, 1.06),
|
||||||
|
)
|
||||||
|
|
||||||
|
return _save_figure(fig, out_dir / f"{out_name}.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_delta_summary(
|
||||||
|
deltas: pd.DataFrame, out_dir: Path, export_tikz: bool
|
||||||
|
) -> list[Path]:
|
||||||
|
data = deltas.sort_values("alpha")
|
||||||
|
x = np.arange(len(data))
|
||||||
|
labels = [f"{alpha:.1f}" for alpha in data["alpha"].to_numpy(dtype=float)]
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(1, 3, figsize=(11.0, 3.8), constrained_layout=True)
|
||||||
|
panels = [
|
||||||
|
("eval_revenue_mean_delta_pct", "Revenue Delta (%)", "#4C72B0"),
|
||||||
|
("eval_reward_mean_delta_pct", "Reward Delta (%)", "#8172B3"),
|
||||||
|
("eval_coi_leakage_mean_delta_pct", "COI Leakage Delta (%)", "#55A868"),
|
||||||
|
]
|
||||||
|
for ax, (column, title, color) in zip(axes, panels):
|
||||||
|
values = data[column].to_numpy(dtype=float)
|
||||||
|
ax.bar(x, values, color=color, alpha=0.85)
|
||||||
|
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
ax.set_xticks(x)
|
||||||
|
ax.set_xticklabels(labels)
|
||||||
|
ax.set_xlabel(r"$\alpha$")
|
||||||
|
ax.set_title(title)
|
||||||
|
|
||||||
|
return _save_figure(fig, out_dir / "wandb_delta_summary.pdf", export_tikz)
|
||||||
|
|
||||||
|
|
||||||
|
def build_artifacts(
|
||||||
|
input_path: Path,
|
||||||
|
output_dir: Path,
|
||||||
|
plot_dir: Path,
|
||||||
|
include_non_finished: bool,
|
||||||
|
export_tikz: bool,
|
||||||
|
) -> list[Path]:
|
||||||
|
raw = pd.read_csv(input_path)
|
||||||
|
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
|
||||||
|
|
||||||
|
metrics = [
|
||||||
|
metric
|
||||||
|
for metric in (
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"train/agent_prob",
|
||||||
|
"train/alpha_adv",
|
||||||
|
)
|
||||||
|
if metric in frame.columns
|
||||||
|
]
|
||||||
|
|
||||||
|
alpha_mode = _summary_by_alpha_mode(frame, metrics)
|
||||||
|
deltas = _delta_by_alpha(alpha_mode, metrics)
|
||||||
|
lambda_summary = _summary_by_parameter(frame, "lambda_coi", metrics)
|
||||||
|
radius_summary = _summary_by_parameter(frame, "ambiguity_radius", metrics)
|
||||||
|
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
plot_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
written: list[Path] = []
|
||||||
|
written.append(_save_table(alpha_mode, output_dir / "wandb_alpha_mode_summary.csv"))
|
||||||
|
written.append(_save_table(deltas, output_dir / "wandb_alpha_deltas.csv"))
|
||||||
|
written.append(
|
||||||
|
_save_table(lambda_summary, output_dir / "wandb_lambda_alpha_summary.csv")
|
||||||
|
)
|
||||||
|
written.append(
|
||||||
|
_save_table(radius_summary, output_dir / "wandb_radius_alpha_summary.csv")
|
||||||
|
)
|
||||||
|
|
||||||
|
written.extend(_plot_alpha_curves(alpha_mode, plot_dir, export_tikz))
|
||||||
|
written.extend(_plot_delta_curves(deltas, plot_dir, export_tikz))
|
||||||
|
written.extend(_plot_tradeoff_scatter(deltas, plot_dir, export_tikz))
|
||||||
|
written.extend(_plot_reward_robustness(alpha_mode, plot_dir, export_tikz))
|
||||||
|
written.extend(
|
||||||
|
_plot_parameter_sensitivity(
|
||||||
|
summary=lambda_summary,
|
||||||
|
parameter="lambda_coi",
|
||||||
|
out_name="wandb_lambda_sensitivity",
|
||||||
|
out_dir=plot_dir,
|
||||||
|
export_tikz=export_tikz,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.extend(
|
||||||
|
_plot_parameter_sensitivity(
|
||||||
|
summary=radius_summary,
|
||||||
|
parameter="ambiguity_radius",
|
||||||
|
out_name="wandb_radius_sensitivity",
|
||||||
|
out_dir=plot_dir,
|
||||||
|
export_tikz=export_tikz,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.extend(_plot_delta_summary(deltas, plot_dir, export_tikz))
|
||||||
|
return written
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generate W&B sweep visualizations for PHANTOM results"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input", type=Path, required=True, help="Path to W&B export CSV"
|
||||||
|
)
|
||||||
|
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||||
|
parser.add_argument("--plot-dir", type=Path, default=None)
|
||||||
|
parser.add_argument("--include-non-finished", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"--export-tikz",
|
||||||
|
action="store_true",
|
||||||
|
help="Export matplotlib figures to TikZ via tikzplotlib",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
_configure_style()
|
||||||
|
plot_dir = (
|
||||||
|
args.plot_dir
|
||||||
|
if args.plot_dir is not None
|
||||||
|
else _default_plot_dir(args.output_dir)
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = build_artifacts(
|
||||||
|
input_path=args.input,
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
plot_dir=plot_dir,
|
||||||
|
include_non_finished=bool(args.include_non_finished),
|
||||||
|
export_tikz=bool(args.export_tikz),
|
||||||
|
)
|
||||||
|
for path in outputs:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
51
paper/src/chapters/figures/results/process_all_results.py
Normal file
51
paper/src/chapters/figures/results/process_all_results.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from process_first_sweep import run as run_first_sweep
|
||||||
|
from process_ppo_benchmark import run as run_ppo_benchmark
|
||||||
|
|
||||||
|
|
||||||
|
def _default_output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Process all result CSV exports for paper figures"
|
||||||
|
)
|
||||||
|
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||||
|
parser.add_argument("--include-non-finished", action="store_true")
|
||||||
|
parser.add_argument("--top-n", type=int, default=25)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
written: list[Path] = []
|
||||||
|
written.extend(
|
||||||
|
run_ppo_benchmark(
|
||||||
|
input_path=Path(__file__).resolve().parents[5]
|
||||||
|
/ "tpu_orchestration"
|
||||||
|
/ "results"
|
||||||
|
/ "ppo_benchmark.csv",
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
include_non_finished=bool(args.include_non_finished),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.extend(
|
||||||
|
run_first_sweep(
|
||||||
|
input_path=Path(__file__).resolve().parents[5]
|
||||||
|
/ "tpu_orchestration"
|
||||||
|
/ "results"
|
||||||
|
/ "first_sweep.csv",
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
include_non_finished=bool(args.include_non_finished),
|
||||||
|
top_n=int(args.top_n),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for path in written:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
409
paper/src/chapters/figures/results/process_final_sweeps.py
Normal file
409
paper/src/chapters/figures/results/process_final_sweeps.py
Normal file
@@ -0,0 +1,409 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def _project_root() -> Path:
|
||||||
|
return Path(__file__).resolve().parents[5]
|
||||||
|
|
||||||
|
|
||||||
|
def _default_bundle_dir() -> Path:
|
||||||
|
base = _project_root() / "engine" / "studies" / "results" / "wandb_sweep_bundles"
|
||||||
|
bundles = sorted(
|
||||||
|
[path for path in base.glob("bundle_*") if path.is_dir()],
|
||||||
|
key=lambda path: path.stat().st_mtime,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
if not bundles:
|
||||||
|
raise FileNotFoundError(f"No sweep bundle directories found in {base}")
|
||||||
|
return bundles[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _default_output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "final"
|
||||||
|
|
||||||
|
|
||||||
|
def _default_plot_dir(output_dir: Path) -> Path:
|
||||||
|
return output_dir / "plots"
|
||||||
|
|
||||||
|
|
||||||
|
def _truthy(value: Any) -> bool:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if value is None:
|
||||||
|
return False
|
||||||
|
return str(value).strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
|
|
||||||
|
def _mode_of(row: pd.Series) -> str:
|
||||||
|
mode_hint = str(row.get("study_mode", "")).strip().lower()
|
||||||
|
if mode_hint in {"baseline", "no_robust"}:
|
||||||
|
return "baseline"
|
||||||
|
if mode_hint in {"defended", "robust"}:
|
||||||
|
return "defended"
|
||||||
|
if _truthy(row.get("baseline_mode")) or _truthy(row.get("no_robust")):
|
||||||
|
return "baseline"
|
||||||
|
return "defended"
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_numeric(frame: pd.DataFrame, columns: list[str]) -> None:
|
||||||
|
for column in columns:
|
||||||
|
if column in frame.columns:
|
||||||
|
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_style() -> None:
|
||||||
|
plt.rcParams.update(
|
||||||
|
{
|
||||||
|
"font.family": "serif",
|
||||||
|
"font.size": 10,
|
||||||
|
"axes.titlesize": 10,
|
||||||
|
"axes.labelsize": 9,
|
||||||
|
"legend.fontsize": 8,
|
||||||
|
"xtick.labelsize": 8,
|
||||||
|
"ytick.labelsize": 8,
|
||||||
|
"figure.dpi": 220,
|
||||||
|
"savefig.dpi": 320,
|
||||||
|
"axes.spines.top": False,
|
||||||
|
"axes.spines.right": False,
|
||||||
|
"axes.grid": True,
|
||||||
|
"grid.alpha": 0.22,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_runs(bundle_dir: Path) -> pd.DataFrame:
|
||||||
|
path = bundle_dir / "runs_finished.csv"
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Missing required file: {path}")
|
||||||
|
frame = pd.read_csv(path)
|
||||||
|
frame["mode"] = frame.apply(_mode_of, axis=1)
|
||||||
|
_coerce_numeric(
|
||||||
|
frame,
|
||||||
|
[
|
||||||
|
"alpha",
|
||||||
|
"n_products",
|
||||||
|
"eval_revenue_mean",
|
||||||
|
"eval_reward_mean",
|
||||||
|
"eval_supra_share_mean",
|
||||||
|
"eval_volatility_mean",
|
||||||
|
"eval_coi_level_mean",
|
||||||
|
"eval_coi_leakage_mean",
|
||||||
|
"objective_score",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return frame
|
||||||
|
|
||||||
|
|
||||||
|
def _focus_sweep(runs: pd.DataFrame) -> str:
|
||||||
|
coverage = (
|
||||||
|
runs.groupby("sweep_id", as_index=False)
|
||||||
|
.agg(
|
||||||
|
n_alpha=("alpha", lambda s: int(pd.Series(s).dropna().nunique())),
|
||||||
|
max_alpha=("alpha", "max"),
|
||||||
|
run_count=("run_id", "size"),
|
||||||
|
)
|
||||||
|
.sort_values(
|
||||||
|
["n_alpha", "max_alpha", "run_count"], ascending=[False, False, False]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if coverage.empty:
|
||||||
|
raise ValueError("No sweep rows available in runs_finished.csv")
|
||||||
|
return str(coverage.iloc[0]["sweep_id"])
|
||||||
|
|
||||||
|
|
||||||
|
def _alpha_mode_summary(runs: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
return (
|
||||||
|
runs.groupby(["alpha", "mode"], as_index=False)
|
||||||
|
.agg(
|
||||||
|
runs=("run_id", "size"),
|
||||||
|
revenue_mean=("eval_revenue_mean", "mean"),
|
||||||
|
reward_mean=("eval_reward_mean", "mean"),
|
||||||
|
supra_mean=("eval_supra_share_mean", "mean"),
|
||||||
|
volatility_mean=("eval_volatility_mean", "mean"),
|
||||||
|
coi_leakage_mean=("eval_coi_leakage_mean", "mean"),
|
||||||
|
coi_level_mean=("eval_coi_level_mean", "mean"),
|
||||||
|
)
|
||||||
|
.sort_values(["alpha", "mode"])
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _alpha_deltas(alpha_mode: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
rows: list[dict[str, float]] = []
|
||||||
|
for alpha, group in alpha_mode.groupby("alpha", sort=True):
|
||||||
|
defended = group[group["mode"] == "defended"]
|
||||||
|
baseline = group[group["mode"] == "baseline"]
|
||||||
|
if defended.empty or baseline.empty:
|
||||||
|
continue
|
||||||
|
d_rev = float(defended["revenue_mean"].iloc[0])
|
||||||
|
b_rev = float(baseline["revenue_mean"].iloc[0])
|
||||||
|
d_reward = float(defended["reward_mean"].iloc[0])
|
||||||
|
b_reward = float(baseline["reward_mean"].iloc[0])
|
||||||
|
d_vol = float(defended["volatility_mean"].iloc[0])
|
||||||
|
b_vol = float(baseline["volatility_mean"].iloc[0])
|
||||||
|
d_supra = float(defended["supra_mean"].iloc[0])
|
||||||
|
b_supra = float(baseline["supra_mean"].iloc[0])
|
||||||
|
d_coi_leak = float(defended["coi_leakage_mean"].iloc[0])
|
||||||
|
b_coi_leak = float(baseline["coi_leakage_mean"].iloc[0])
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"alpha": float(alpha),
|
||||||
|
"revenue_delta": d_rev - b_rev,
|
||||||
|
"revenue_delta_pct": 0.0
|
||||||
|
if b_rev == 0.0
|
||||||
|
else 100.0 * (d_rev - b_rev) / b_rev,
|
||||||
|
"reward_delta": d_reward - b_reward,
|
||||||
|
"reward_delta_pct": 0.0
|
||||||
|
if b_reward == 0.0
|
||||||
|
else 100.0 * (d_reward - b_reward) / b_reward,
|
||||||
|
"volatility_delta": d_vol - b_vol,
|
||||||
|
"supra_delta": d_supra - b_supra,
|
||||||
|
"coi_leakage_delta": d_coi_leak - b_coi_leak,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return pd.DataFrame(rows).sort_values("alpha").reset_index(drop=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _zone_summary(alpha_deltas: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
if alpha_deltas.empty:
|
||||||
|
return pd.DataFrame()
|
||||||
|
data = alpha_deltas.copy()
|
||||||
|
data["zone"] = np.where(
|
||||||
|
data["alpha"] >= 0.7, "high_alpha_0_7_plus", "low_alpha_below_0_7"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
data.groupby("zone", as_index=False)
|
||||||
|
.agg(
|
||||||
|
alpha_cells=("alpha", "size"),
|
||||||
|
revenue_delta_pct_mean=("revenue_delta_pct", "mean"),
|
||||||
|
reward_delta_pct_mean=("reward_delta_pct", "mean"),
|
||||||
|
coi_leakage_delta_mean=("coi_leakage_delta", "mean"),
|
||||||
|
volatility_delta_mean=("volatility_delta", "mean"),
|
||||||
|
)
|
||||||
|
.sort_values("zone")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_plot(fig: plt.Figure, path: Path) -> Path:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fig.savefig(path, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
|
||||||
|
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||||
|
for mode, color, label in (
|
||||||
|
("baseline", "#4C72B0", "Baseline"),
|
||||||
|
("defended", "#C44E52", "Defended"),
|
||||||
|
):
|
||||||
|
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||||
|
if sub.empty:
|
||||||
|
continue
|
||||||
|
ax.plot(
|
||||||
|
sub["alpha"],
|
||||||
|
sub["revenue_mean"],
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.9,
|
||||||
|
markersize=4,
|
||||||
|
color=color,
|
||||||
|
label=label,
|
||||||
|
)
|
||||||
|
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel("Mean episode revenue")
|
||||||
|
ax.set_title("Final Cohort Revenue Curves")
|
||||||
|
ax.legend(loc="lower left")
|
||||||
|
return _save_plot(fig, out_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
|
||||||
|
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||||
|
x = alpha_deltas["alpha"].to_numpy(dtype=float)
|
||||||
|
y = alpha_deltas["revenue_delta_pct"].to_numpy(dtype=float)
|
||||||
|
ax.plot(x, y, marker="o", linewidth=2.0, markersize=4, color="#C44E52")
|
||||||
|
ax.fill_between(x, y, 0.0, color="#C44E52", alpha=0.12)
|
||||||
|
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
|
||||||
|
high = alpha_deltas[alpha_deltas["alpha"] >= 0.7]
|
||||||
|
if not high.empty:
|
||||||
|
best = high.reindex(
|
||||||
|
high["revenue_delta_pct"].abs().sort_values(ascending=False).index
|
||||||
|
).iloc[0]
|
||||||
|
ax.scatter(
|
||||||
|
[best["alpha"]],
|
||||||
|
[best["revenue_delta_pct"]],
|
||||||
|
color="#1f77b4",
|
||||||
|
s=45,
|
||||||
|
zorder=3,
|
||||||
|
)
|
||||||
|
ax.annotate(
|
||||||
|
f"high-alpha peak {best['revenue_delta_pct']:.2f}%",
|
||||||
|
(float(best["alpha"]), float(best["revenue_delta_pct"])),
|
||||||
|
textcoords="offset points",
|
||||||
|
xytext=(6, 6),
|
||||||
|
fontsize=8,
|
||||||
|
)
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel("Defended minus baseline revenue (%)")
|
||||||
|
ax.set_title("Revenue Delta by Contamination (Final Cohort)")
|
||||||
|
return _save_plot(fig, out_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _plot_focus_risk_deltas(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
|
||||||
|
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
|
||||||
|
x = alpha_deltas["alpha"].to_numpy(dtype=float)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
alpha_deltas["coi_leakage_delta"].to_numpy(dtype=float),
|
||||||
|
marker="o",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=4,
|
||||||
|
color="#55A868",
|
||||||
|
label="COI leakage delta",
|
||||||
|
)
|
||||||
|
ax.plot(
|
||||||
|
x,
|
||||||
|
alpha_deltas["volatility_delta"].to_numpy(dtype=float),
|
||||||
|
marker="s",
|
||||||
|
linewidth=1.8,
|
||||||
|
markersize=3.8,
|
||||||
|
color="#8172B3",
|
||||||
|
label="Volatility delta",
|
||||||
|
)
|
||||||
|
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||||
|
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
|
||||||
|
ax.set_xlabel(r"Contamination $\alpha$")
|
||||||
|
ax.set_ylabel("Defended minus baseline")
|
||||||
|
ax.set_title("Leakage and Stability Deltas (Final Cohort)")
|
||||||
|
ax.legend(loc="lower left")
|
||||||
|
return _save_plot(fig, out_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_include(path: Path, figure_rel_path: str, width: str) -> Path:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(f"\\includegraphics[width={width}]{{{figure_rel_path}}}\n")
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
|
||||||
|
all_runs = _load_runs(bundle_dir)
|
||||||
|
focus_id = _focus_sweep(all_runs)
|
||||||
|
focus_runs = all_runs[all_runs["sweep_id"] == focus_id].copy()
|
||||||
|
alpha_mode = _alpha_mode_summary(focus_runs)
|
||||||
|
deltas = _alpha_deltas(alpha_mode)
|
||||||
|
zones = _zone_summary(deltas)
|
||||||
|
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
plot_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
written: list[Path] = []
|
||||||
|
alpha_mode_path = output_dir / "final_focus_alpha_mode_summary.csv"
|
||||||
|
alpha_mode.to_csv(alpha_mode_path, index=False)
|
||||||
|
written.append(alpha_mode_path)
|
||||||
|
|
||||||
|
delta_path = output_dir / "final_focus_alpha_deltas.csv"
|
||||||
|
deltas.to_csv(delta_path, index=False)
|
||||||
|
written.append(delta_path)
|
||||||
|
|
||||||
|
zone_path = output_dir / "final_focus_zone_summary.csv"
|
||||||
|
zones.to_csv(zone_path, index=False)
|
||||||
|
written.append(zone_path)
|
||||||
|
|
||||||
|
headline = {
|
||||||
|
"bundle": str(bundle_dir),
|
||||||
|
"focus_cohort": "max_alpha_coverage",
|
||||||
|
"alpha_cells": int(deltas["alpha"].nunique()) if not deltas.empty else 0,
|
||||||
|
"alpha_min": float(deltas["alpha"].min()) if not deltas.empty else None,
|
||||||
|
"alpha_max": float(deltas["alpha"].max()) if not deltas.empty else None,
|
||||||
|
"mean_revenue_delta_pct": float(deltas["revenue_delta_pct"].mean())
|
||||||
|
if not deltas.empty
|
||||||
|
else None,
|
||||||
|
"mean_reward_delta_pct": float(deltas["reward_delta_pct"].mean())
|
||||||
|
if not deltas.empty
|
||||||
|
else None,
|
||||||
|
"zone_summary": zones.to_dict(orient="records"),
|
||||||
|
}
|
||||||
|
headline_path = output_dir / "final_focus_headline_summary.json"
|
||||||
|
headline_path.write_text(json.dumps(headline, indent=2) + "\n")
|
||||||
|
written.append(headline_path)
|
||||||
|
|
||||||
|
written.append(
|
||||||
|
_plot_focus_revenue_by_alpha(
|
||||||
|
alpha_mode,
|
||||||
|
plot_dir / "final_focus_revenue_by_alpha.pdf",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.append(
|
||||||
|
_plot_focus_revenue_delta(
|
||||||
|
deltas,
|
||||||
|
plot_dir / "final_focus_revenue_delta.pdf",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.append(
|
||||||
|
_plot_focus_risk_deltas(
|
||||||
|
deltas,
|
||||||
|
plot_dir / "final_focus_risk_deltas.pdf",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
include_dir = Path(__file__).resolve().parent / "includes" / "final"
|
||||||
|
written.append(
|
||||||
|
_write_include(
|
||||||
|
include_dir / "final_focus_revenue_by_alpha.tex",
|
||||||
|
"chapters/figures/results/generated/final/plots/final_focus_revenue_by_alpha.pdf",
|
||||||
|
"0.98\\linewidth",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.append(
|
||||||
|
_write_include(
|
||||||
|
include_dir / "final_focus_revenue_delta.tex",
|
||||||
|
"chapters/figures/results/generated/final/plots/final_focus_revenue_delta.pdf",
|
||||||
|
"0.95\\linewidth",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
written.append(
|
||||||
|
_write_include(
|
||||||
|
include_dir / "final_focus_risk_deltas.tex",
|
||||||
|
"chapters/figures/results/generated/final/plots/final_focus_risk_deltas.pdf",
|
||||||
|
"0.95\\linewidth",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return written
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generate final paper figures/tables from the final sweep cohort"
|
||||||
|
)
|
||||||
|
parser.add_argument("--bundle-dir", type=Path, default=_default_bundle_dir())
|
||||||
|
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||||
|
parser.add_argument("--plot-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
_configure_style()
|
||||||
|
plot_dir = (
|
||||||
|
args.plot_dir
|
||||||
|
if args.plot_dir is not None
|
||||||
|
else _default_plot_dir(args.output_dir)
|
||||||
|
)
|
||||||
|
outputs = run(
|
||||||
|
bundle_dir=args.bundle_dir, output_dir=args.output_dir, plot_dir=plot_dir
|
||||||
|
)
|
||||||
|
for path in outputs:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
272
paper/src/chapters/figures/results/process_first_sweep.py
Normal file
272
paper/src/chapters/figures/results/process_first_sweep.py
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def _project_root() -> Path:
|
||||||
|
return Path(__file__).resolve().parents[5]
|
||||||
|
|
||||||
|
|
||||||
|
def _default_input() -> Path:
|
||||||
|
return _project_root() / "tpu_orchestration" / "results" / "first_sweep.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def _default_output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize(key: str) -> str:
|
||||||
|
return key.replace("/", "_").replace("-", "_")
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
|
||||||
|
for column in columns:
|
||||||
|
if column in frame.columns:
|
||||||
|
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["study/alpha"], errors="coerce")
|
||||||
|
if "alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["alpha"], errors="coerce")
|
||||||
|
return pd.Series(np.nan, index=frame.index, dtype=float)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/mode" in frame.columns:
|
||||||
|
return frame["study/mode"].astype(str).str.strip().str.lower()
|
||||||
|
if "study/no_robust" in frame.columns:
|
||||||
|
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust > 0.5, "no_robust", "robust"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
if "no_robust" in frame.columns:
|
||||||
|
no_robust = (
|
||||||
|
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
|
||||||
|
)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust, "no_robust", "robust"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
return pd.Series("", index=frame.index, dtype="object")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_tier(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
for column in ("tiers", "runtime/backend", "algo", "run.backend", "run.algo"):
|
||||||
|
if column in frame.columns:
|
||||||
|
tier = frame[column].astype(str).str.strip().str.lower()
|
||||||
|
if tier.notna().any():
|
||||||
|
return tier
|
||||||
|
return pd.Series("unknown", index=frame.index, dtype="object")
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
|
||||||
|
data = frame.copy()
|
||||||
|
if not include_non_finished and "State" in data.columns:
|
||||||
|
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
|
||||||
|
|
||||||
|
data["alpha"] = _extract_alpha(data)
|
||||||
|
data["mode"] = _extract_mode(data)
|
||||||
|
data["tier"] = _extract_tier(data)
|
||||||
|
data = data[data["mode"].isin({"robust", "no_robust"})]
|
||||||
|
data = data[data["alpha"].notna()]
|
||||||
|
|
||||||
|
_coerce_numeric(
|
||||||
|
data,
|
||||||
|
[
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"objective/score",
|
||||||
|
"train/alpha_adv",
|
||||||
|
"lambda_coi",
|
||||||
|
"robust_radius",
|
||||||
|
"learning_rate",
|
||||||
|
"batch_size",
|
||||||
|
"n_steps",
|
||||||
|
"total_timesteps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return data.sort_values(["tier", "alpha", "mode"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _group_summary(
|
||||||
|
frame: pd.DataFrame, by: list[str], metrics: list[str]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||||
|
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||||
|
return frame.groupby(by, as_index=False).agg(**agg_spec).sort_values(by)
|
||||||
|
|
||||||
|
|
||||||
|
def _tier_alpha_deltas(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
rows: list[dict[str, float | str]] = []
|
||||||
|
for (tier, alpha), group in summary.groupby(["tier", "alpha"], sort=True):
|
||||||
|
robust = group[group["mode"] == "robust"]
|
||||||
|
no_robust = group[group["mode"] == "no_robust"]
|
||||||
|
if robust.empty or no_robust.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
row: dict[str, float | str] = {
|
||||||
|
"tier": str(tier),
|
||||||
|
"alpha": float(alpha),
|
||||||
|
"runs_robust": float(robust["runs"].iloc[0]),
|
||||||
|
"runs_no_robust": float(no_robust["runs"].iloc[0]),
|
||||||
|
}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
robust_value = float(robust[f"{safe}_mean"].iloc[0])
|
||||||
|
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
|
||||||
|
delta = robust_value - no_robust_value
|
||||||
|
row[f"{safe}_delta"] = delta
|
||||||
|
row[f"{safe}_delta_pct"] = (
|
||||||
|
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
|
||||||
|
)
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
return pd.DataFrame(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def _top_runs(frame: pd.DataFrame, n: int) -> pd.DataFrame:
|
||||||
|
rank_metric = "objective/score"
|
||||||
|
if rank_metric not in frame.columns or frame[rank_metric].notna().sum() == 0:
|
||||||
|
rank_metric = "eval/reward_mean"
|
||||||
|
|
||||||
|
keep = [
|
||||||
|
"Name",
|
||||||
|
"tier",
|
||||||
|
"alpha",
|
||||||
|
"mode",
|
||||||
|
rank_metric,
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"lambda_coi",
|
||||||
|
"robust_radius",
|
||||||
|
"learning_rate",
|
||||||
|
"batch_size",
|
||||||
|
"n_steps",
|
||||||
|
"total_timesteps",
|
||||||
|
]
|
||||||
|
present = [column for column in keep if column in frame.columns]
|
||||||
|
ranked = frame[present].copy().sort_values(rank_metric, ascending=False)
|
||||||
|
return ranked.head(max(1, int(n))).reset_index(drop=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _headline_json(
|
||||||
|
frame: pd.DataFrame, tier_mode: pd.DataFrame
|
||||||
|
) -> dict[str, float | str]:
|
||||||
|
out: dict[str, float | str] = {
|
||||||
|
"runs": int(len(frame)),
|
||||||
|
"tiers": int(frame["tier"].nunique()),
|
||||||
|
"alphas": int(frame["alpha"].nunique()),
|
||||||
|
}
|
||||||
|
|
||||||
|
robust_rows = tier_mode[tier_mode["mode"] == "robust"]
|
||||||
|
no_robust_rows = tier_mode[tier_mode["mode"] == "no_robust"]
|
||||||
|
if robust_rows.empty or no_robust_rows.empty:
|
||||||
|
out["status"] = "incomplete_modes"
|
||||||
|
return out
|
||||||
|
|
||||||
|
robust_mean = robust_rows["eval_revenue_mean_mean"].mean()
|
||||||
|
no_robust_mean = no_robust_rows["eval_revenue_mean_mean"].mean()
|
||||||
|
out.update(
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"mean_tier_revenue_robust": float(robust_mean),
|
||||||
|
"mean_tier_revenue_no_robust": float(no_robust_mean),
|
||||||
|
"mean_tier_revenue_delta": float(robust_mean - no_robust_mean),
|
||||||
|
"mean_tier_revenue_delta_pct": float(
|
||||||
|
100.0 * (robust_mean - no_robust_mean) / no_robust_mean
|
||||||
|
)
|
||||||
|
if no_robust_mean
|
||||||
|
else np.nan,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def run(
|
||||||
|
input_path: Path, output_dir: Path, include_non_finished: bool, top_n: int
|
||||||
|
) -> list[Path]:
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
raw = pd.read_csv(input_path)
|
||||||
|
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
|
||||||
|
|
||||||
|
metrics = [
|
||||||
|
metric
|
||||||
|
for metric in (
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"objective/score",
|
||||||
|
"train/alpha_adv",
|
||||||
|
)
|
||||||
|
if metric in frame.columns
|
||||||
|
]
|
||||||
|
|
||||||
|
tier_mode = _group_summary(frame, ["tier", "mode"], metrics)
|
||||||
|
tier_alpha_mode = _group_summary(frame, ["tier", "alpha", "mode"], metrics)
|
||||||
|
deltas = _tier_alpha_deltas(tier_alpha_mode, metrics)
|
||||||
|
top_configs = _top_runs(frame, n=top_n)
|
||||||
|
headline = _headline_json(frame, tier_mode)
|
||||||
|
|
||||||
|
outputs = {
|
||||||
|
"first_sweep_tier_mode_summary.csv": tier_mode,
|
||||||
|
"first_sweep_tier_alpha_mode_summary.csv": tier_alpha_mode,
|
||||||
|
"first_sweep_tier_alpha_deltas.csv": deltas,
|
||||||
|
"first_sweep_top_configs.csv": top_configs,
|
||||||
|
}
|
||||||
|
written_paths: list[Path] = []
|
||||||
|
for filename, table in outputs.items():
|
||||||
|
path = output_dir / filename
|
||||||
|
table.to_csv(path, index=False)
|
||||||
|
written_paths.append(path)
|
||||||
|
|
||||||
|
headline_path = output_dir / "first_sweep_headline_summary.json"
|
||||||
|
headline_path.write_text(json.dumps(headline, indent=2))
|
||||||
|
written_paths.append(headline_path)
|
||||||
|
return written_paths
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Process first sweep CSV for paper tables"
|
||||||
|
)
|
||||||
|
parser.add_argument("--input", type=Path, default=_default_input())
|
||||||
|
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||||
|
parser.add_argument("--include-non-finished", action="store_true")
|
||||||
|
parser.add_argument("--top-n", type=int, default=25)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
written = run(
|
||||||
|
input_path=args.input,
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
include_non_finished=bool(args.include_non_finished),
|
||||||
|
top_n=int(args.top_n),
|
||||||
|
)
|
||||||
|
for path in written:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
277
paper/src/chapters/figures/results/process_ppo_benchmark.py
Normal file
277
paper/src/chapters/figures/results/process_ppo_benchmark.py
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def _project_root() -> Path:
|
||||||
|
return Path(__file__).resolve().parents[5]
|
||||||
|
|
||||||
|
|
||||||
|
def _default_input() -> Path:
|
||||||
|
return _project_root() / "tpu_orchestration" / "results" / "ppo_benchmark.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def _default_output_dir() -> Path:
|
||||||
|
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize(key: str) -> str:
|
||||||
|
return key.replace("/", "_").replace("-", "_")
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
|
||||||
|
for column in columns:
|
||||||
|
if column in frame.columns:
|
||||||
|
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["study/alpha"], errors="coerce")
|
||||||
|
if "alpha" in frame.columns:
|
||||||
|
return pd.to_numeric(frame["alpha"], errors="coerce")
|
||||||
|
return pd.Series(np.nan, index=frame.index, dtype=float)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
|
||||||
|
if "study/mode" in frame.columns:
|
||||||
|
return frame["study/mode"].astype(str).str.strip().str.lower()
|
||||||
|
if "study/no_robust" in frame.columns:
|
||||||
|
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust > 0.5, "no_robust", "robust"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
if "no_robust" in frame.columns:
|
||||||
|
no_robust = (
|
||||||
|
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
|
||||||
|
)
|
||||||
|
return pd.Series(
|
||||||
|
np.where(no_robust, "no_robust", "robust"),
|
||||||
|
index=frame.index,
|
||||||
|
dtype="object",
|
||||||
|
)
|
||||||
|
return pd.Series("", index=frame.index, dtype="object")
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
|
||||||
|
data = frame.copy()
|
||||||
|
if not include_non_finished and "State" in data.columns:
|
||||||
|
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
|
||||||
|
|
||||||
|
data["alpha"] = _extract_alpha(data)
|
||||||
|
data["mode"] = _extract_mode(data)
|
||||||
|
data = data[data["mode"].isin({"robust", "no_robust"})]
|
||||||
|
data = data[data["alpha"].notna()]
|
||||||
|
|
||||||
|
numeric_cols = [
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"train/alpha_adv",
|
||||||
|
"train/coi_penalty",
|
||||||
|
"train/ux_penalty",
|
||||||
|
"train/agent_prob",
|
||||||
|
]
|
||||||
|
_coerce_numeric(data, numeric_cols)
|
||||||
|
return data.sort_values(["alpha", "mode"]).reset_index(drop=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _summary_by_alpha_mode(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||||
|
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||||
|
|
||||||
|
return (
|
||||||
|
frame.groupby(["alpha", "mode"], as_index=False)
|
||||||
|
.agg(**agg_spec)
|
||||||
|
.sort_values(["alpha", "mode"])
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _delta_by_alpha(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
rows: list[dict[str, float]] = []
|
||||||
|
for alpha, alpha_group in summary.groupby("alpha", sort=True):
|
||||||
|
robust = alpha_group[alpha_group["mode"] == "robust"]
|
||||||
|
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
|
||||||
|
if robust.empty or no_robust.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
row: dict[str, float] = {
|
||||||
|
"alpha": float(alpha),
|
||||||
|
"runs_robust": float(robust["runs"].iloc[0]),
|
||||||
|
"runs_no_robust": float(no_robust["runs"].iloc[0]),
|
||||||
|
}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
robust_value = float(robust[f"{safe}_mean"].iloc[0])
|
||||||
|
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
|
||||||
|
delta = robust_value - no_robust_value
|
||||||
|
row[f"{safe}_robust"] = robust_value
|
||||||
|
row[f"{safe}_no_robust"] = no_robust_value
|
||||||
|
row[f"{safe}_delta"] = delta
|
||||||
|
row[f"{safe}_delta_pct"] = (
|
||||||
|
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
|
||||||
|
)
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
return pd.DataFrame(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def _pairwise_win_rates(frame: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
rules = {
|
||||||
|
"eval/revenue_mean": "higher",
|
||||||
|
"eval/reward_mean": "higher",
|
||||||
|
"eval/coi_leakage_mean": "lower",
|
||||||
|
"eval/volatility_mean": "lower",
|
||||||
|
}
|
||||||
|
rows: list[dict[str, float]] = []
|
||||||
|
for alpha, alpha_group in frame.groupby("alpha", sort=True):
|
||||||
|
robust = alpha_group[alpha_group["mode"] == "robust"]
|
||||||
|
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
|
||||||
|
if robust.empty or no_robust.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for metric, direction in rules.items():
|
||||||
|
if metric not in frame.columns:
|
||||||
|
continue
|
||||||
|
robust_values = robust[metric].dropna().to_numpy(dtype=float)
|
||||||
|
no_robust_values = no_robust[metric].dropna().to_numpy(dtype=float)
|
||||||
|
if robust_values.size == 0 or no_robust_values.size == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if direction == "higher":
|
||||||
|
wins = (robust_values[:, None] > no_robust_values[None, :]).sum()
|
||||||
|
else:
|
||||||
|
wins = (robust_values[:, None] < no_robust_values[None, :]).sum()
|
||||||
|
ties = (robust_values[:, None] == no_robust_values[None, :]).sum()
|
||||||
|
total = robust_values.size * no_robust_values.size
|
||||||
|
win_prob = (wins + 0.5 * ties) / total
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"alpha": float(alpha),
|
||||||
|
"metric": metric,
|
||||||
|
"direction": direction,
|
||||||
|
"wins": int(wins),
|
||||||
|
"ties": int(ties),
|
||||||
|
"total_pairs": int(total),
|
||||||
|
"win_probability": float(win_prob),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return pd.DataFrame(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def _overall_mode_summary(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||||
|
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||||
|
for metric in metrics:
|
||||||
|
safe = _sanitize(metric)
|
||||||
|
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||||
|
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||||
|
return frame.groupby("mode", as_index=False).agg(**agg_spec).sort_values("mode")
|
||||||
|
|
||||||
|
|
||||||
|
def _headline_json(overall: pd.DataFrame) -> dict[str, float | str]:
|
||||||
|
if {"robust", "no_robust"} - set(overall["mode"].tolist()):
|
||||||
|
return {"status": "incomplete_modes"}
|
||||||
|
|
||||||
|
robust = overall[overall["mode"] == "robust"].iloc[0]
|
||||||
|
no_robust = overall[overall["mode"] == "no_robust"].iloc[0]
|
||||||
|
|
||||||
|
revenue_delta = float(
|
||||||
|
robust["eval_revenue_mean_mean"] - no_robust["eval_revenue_mean_mean"]
|
||||||
|
)
|
||||||
|
leakage_delta = float(
|
||||||
|
robust["eval_coi_leakage_mean_mean"] - no_robust["eval_coi_leakage_mean_mean"]
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"revenue_delta": revenue_delta,
|
||||||
|
"revenue_delta_pct": float(
|
||||||
|
100.0 * revenue_delta / no_robust["eval_revenue_mean_mean"]
|
||||||
|
),
|
||||||
|
"coi_leakage_delta": leakage_delta,
|
||||||
|
"coi_leakage_delta_pct": float(
|
||||||
|
100.0 * leakage_delta / no_robust["eval_coi_leakage_mean_mean"]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run(input_path: Path, output_dir: Path, include_non_finished: bool) -> list[Path]:
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
raw = pd.read_csv(input_path)
|
||||||
|
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
|
||||||
|
|
||||||
|
metrics = [
|
||||||
|
metric
|
||||||
|
for metric in (
|
||||||
|
"eval/revenue_mean",
|
||||||
|
"eval/reward_mean",
|
||||||
|
"eval/coi_level_mean",
|
||||||
|
"eval/coi_leakage_mean",
|
||||||
|
"eval/volatility_mean",
|
||||||
|
"eval/margin_mean",
|
||||||
|
"train/alpha_adv",
|
||||||
|
"train/coi_penalty",
|
||||||
|
"train/ux_penalty",
|
||||||
|
"train/agent_prob",
|
||||||
|
)
|
||||||
|
if metric in frame.columns
|
||||||
|
]
|
||||||
|
|
||||||
|
alpha_mode = _summary_by_alpha_mode(frame, metrics)
|
||||||
|
deltas = _delta_by_alpha(alpha_mode, metrics)
|
||||||
|
win_rates = _pairwise_win_rates(frame)
|
||||||
|
overall = _overall_mode_summary(frame, metrics)
|
||||||
|
headline = _headline_json(overall)
|
||||||
|
|
||||||
|
outputs = {
|
||||||
|
"ppo_alpha_mode_summary.csv": alpha_mode,
|
||||||
|
"ppo_alpha_deltas.csv": deltas,
|
||||||
|
"ppo_pairwise_win_rates.csv": win_rates,
|
||||||
|
"ppo_overall_mode_summary.csv": overall,
|
||||||
|
}
|
||||||
|
written_paths: list[Path] = []
|
||||||
|
for filename, table in outputs.items():
|
||||||
|
path = output_dir / filename
|
||||||
|
table.to_csv(path, index=False)
|
||||||
|
written_paths.append(path)
|
||||||
|
|
||||||
|
headline_path = output_dir / "ppo_headline_summary.json"
|
||||||
|
headline_path.write_text(json.dumps(headline, indent=2))
|
||||||
|
written_paths.append(headline_path)
|
||||||
|
return written_paths
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Process PPO benchmark CSV for paper tables"
|
||||||
|
)
|
||||||
|
parser.add_argument("--input", type=Path, default=_default_input())
|
||||||
|
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||||
|
parser.add_argument("--include-non-finished", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
written = run(
|
||||||
|
input_path=args.input,
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
include_non_finished=bool(args.include_non_finished),
|
||||||
|
)
|
||||||
|
for path in written:
|
||||||
|
print(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -21,7 +21,7 @@
|
|||||||
surf,
|
surf,
|
||||||
shader=flat,
|
shader=flat,
|
||||||
mesh/check=false % Disable check to rely on empty lines
|
mesh/check=false % Disable check to rely on empty lines
|
||||||
] table [col sep=comma, x=step, y=price, z=density] {chapters/figures/supra_data.csv};
|
] table [col sep=comma, x=step, y=price, z=density] {chapters/figures/supra/supra_data.csv};
|
||||||
|
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
@@ -4038,4 +4038,3 @@ step,price,density
|
|||||||
4000,146.51098761558535,0.0
|
4000,146.51098761558535,0.0
|
||||||
4000,147.9065925693512,0.0
|
4000,147.9065925693512,0.0
|
||||||
4000,149.30219752311706,10.0
|
4000,149.30219752311706,10.0
|
||||||
|
|
||||||
|
166
paper/src/chapters/hero_architecture_figure.tex
Normal file
166
paper/src/chapters/hero_architecture_figure.tex
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
\definecolor{heroBlue}{RGB}{212, 228, 255}
|
||||||
|
\definecolor{heroBlueBorder}{RGB}{64, 103, 178}
|
||||||
|
\definecolor{heroGreen}{RGB}{214, 238, 216}
|
||||||
|
\definecolor{heroGreenBorder}{RGB}{48, 133, 66}
|
||||||
|
\definecolor{heroAmber}{RGB}{246, 230, 202}
|
||||||
|
\definecolor{heroAmberBorder}{RGB}{166, 121, 51}
|
||||||
|
\definecolor{heroGray}{RGB}{236, 236, 236}
|
||||||
|
\definecolor{heroGrayBorder}{RGB}{120, 120, 120}
|
||||||
|
|
||||||
|
% Panels occupy y = 2.2 .. 10.0
|
||||||
|
% Cross-panel connector gutter lives at y = 1.0 .. 2.2 (clearly below all nodes)
|
||||||
|
\begin{tikzpicture}[
|
||||||
|
>=Stealth,
|
||||||
|
font=\small,
|
||||||
|
panel/.style={draw=black!65, dashed, rounded corners=4pt, line width=0.85pt},
|
||||||
|
bB/.style={rectangle, rounded corners=3pt, draw=heroBlueBorder, fill=heroBlue,
|
||||||
|
line width=0.9pt, align=center, minimum height=0.85cm},
|
||||||
|
bG/.style={rectangle, rounded corners=3pt, draw=heroGreenBorder, fill=heroGreen,
|
||||||
|
line width=0.9pt, align=center, minimum height=0.85cm},
|
||||||
|
bA/.style={rectangle, rounded corners=3pt, draw=heroAmberBorder, fill=heroAmber,
|
||||||
|
line width=0.9pt, align=center, minimum height=0.85cm},
|
||||||
|
bY/.style={rectangle, rounded corners=3pt, draw=heroGrayBorder, fill=heroGray,
|
||||||
|
line width=0.9pt, align=center, minimum height=0.82cm},
|
||||||
|
pill/.style={ellipse, draw=black!50, fill=black!4, line width=0.75pt,
|
||||||
|
align=center, minimum width=1.6cm, minimum height=0.68cm},
|
||||||
|
arr/.style={->, draw=black!80, line width=0.88pt},
|
||||||
|
bidir/.style={<->, draw=black!80, line width=0.88pt},
|
||||||
|
darr/.style={->, draw=black!60, line width=0.80pt, densely dashed},
|
||||||
|
crossA/.style={->, draw=heroAmberBorder!90!black, line width=1.15pt, dash pattern=on 3.5pt off 2pt},
|
||||||
|
crossG/.style={->, draw=heroGreenBorder!90!black, line width=1.15pt, dash pattern=on 3.5pt off 2pt},
|
||||||
|
arrG/.style={->, draw=heroGreenBorder!90!black, line width=1.15pt},
|
||||||
|
lbl/.style={font=\scriptsize, align=center, fill=white, inner sep=1.5pt, text=black}
|
||||||
|
]
|
||||||
|
|
||||||
|
%% ============================================================
|
||||||
|
%% Panel A x: 0.2–11.2 y: 2.2–10.0
|
||||||
|
%% ============================================================
|
||||||
|
\draw[panel] (0.2,2.2) rectangle (11.2,10.0);
|
||||||
|
\node[anchor=west, font=\small\bfseries] at (0.45,9.72) {(a) Online platform and data plane};
|
||||||
|
|
||||||
|
\node[pill] (human) at (1.3, 8.55) {Human};
|
||||||
|
\node[pill] (agent) at (1.3, 7.45) {Agent};
|
||||||
|
|
||||||
|
\node[bB, minimum width=2.75cm] (web) at (4.2, 8.0) {Next.js\\Web App};
|
||||||
|
\node[bB, minimum width=2.75cm] (provider) at (7.35, 8.0) {Pricing\\Provider};
|
||||||
|
\node[bY, minimum width=1.85cm] (redis) at (9.85, 8.0) {Redis};
|
||||||
|
|
||||||
|
\node[bG, minimum width=3.1cm] (kBehav) at (4.0, 6.2) {Kafka stream\\Behavior events};
|
||||||
|
\node[bG, minimum width=3.0cm] (kQuotes) at (7.5, 6.2) {Kafka stream\\Price quotes};
|
||||||
|
|
||||||
|
\node[bA, minimum width=3.1cm] (worker) at (4.0, 4.4) {Worker / ETL\\Feature jobs};
|
||||||
|
\node[bA, minimum width=2.65cm] (registry) at (8.45, 4.4) {Model\\Registry};
|
||||||
|
|
||||||
|
% service row
|
||||||
|
\draw[arr] (human.east) -- (web.west);
|
||||||
|
\draw[arr] (agent.east) -- (web.west);
|
||||||
|
\draw[arr] (web.east) -- (provider.west);
|
||||||
|
\draw[bidir] (provider.east) -- (redis.west);
|
||||||
|
|
||||||
|
% web/provider -> kafka
|
||||||
|
\draw[arr] (web.south) -- (kBehav.north)
|
||||||
|
node[midway, left, lbl] {$e=(a,i,t,\mu,\delta)$};
|
||||||
|
\draw[arr] (provider.south) -- (kQuotes.north)
|
||||||
|
node[midway, right, lbl] {$(i,p,\mathrm{sid},\phi,t)$};
|
||||||
|
|
||||||
|
% kafka -> worker (straight south)
|
||||||
|
\draw[arr] (kBehav.south) -- (worker.north);
|
||||||
|
\draw[arr] (kQuotes.south) -- (worker.north);
|
||||||
|
|
||||||
|
% worker -> registry
|
||||||
|
\draw[arr] (worker.east) -- (registry.west);
|
||||||
|
|
||||||
|
% model refresh: registry east -> goes right to x=11.0, north to y=9.2, left to provider
|
||||||
|
% this keeps it entirely inside panel A with no crossing of nodes
|
||||||
|
\draw[crossA, rounded corners=6pt]
|
||||||
|
(registry.east) -- (11.0, 4.4)
|
||||||
|
-- (11.0, 9.2)
|
||||||
|
-- node[midway, lbl] {model refresh} (provider.north |- 0, 9.2)
|
||||||
|
-- (provider.north);
|
||||||
|
|
||||||
|
%% ============================================================
|
||||||
|
%% Panel B x: 11.6–20.4 y: 2.2–10.0
|
||||||
|
%% ============================================================
|
||||||
|
\draw[panel] (11.6,2.2) rectangle (19.8,10.0);
|
||||||
|
\node[anchor=west, font=\small\bfseries] at (11.85,9.72) {(b) Distinguishability layer};
|
||||||
|
|
||||||
|
\node[bG, minimum width=2.4cm] (session) at (14.0, 8.9) {Session prefix\\$\tau'$};
|
||||||
|
\node[bB, minimum width=2.4cm] (empKern) at (13.65,7.45) {Empirical kernel\\$\hat T'$};
|
||||||
|
\node[bY, minimum width=2.4cm] (weakLab) at (17.55,8.9) {Weak labels\\$\mathcal{D}_H,\mathcal{D}_A$};
|
||||||
|
\node[bY, minimum width=2.2cm] (protoH) at (12.8, 5.9) {Prototype\\$\bar T_H$};
|
||||||
|
\node[bA, minimum width=2.4cm] (kldist) at (15.55,5.9) {KL distances\\$\Delta_H,\Delta_A$};
|
||||||
|
\node[bY, minimum width=2.2cm] (protoA) at (18.3, 5.9) {Prototype\\$\bar T_A$};
|
||||||
|
\node[bB, minimum width=2.9cm] (calHead) at (13.55,4.25) {Contrastive\\calibration head};
|
||||||
|
\node[bG, minimum width=2.55cm] (score) at (17.75,4.25) {Session score\\$f(\tau'),\hat\alpha(\tau')$};
|
||||||
|
|
||||||
|
\node[lbl] at (15.55, 3.15) {$\hat\alpha(\tau')=\sigma\!\left(\beta(\Delta_H-\Delta_A)\right)$};
|
||||||
|
|
||||||
|
\draw[arr, rounded corners=4pt] (session.south) -- (empKern.north);
|
||||||
|
\draw[arr, rounded corners=4pt] (empKern.south) -- (13.65, 6.8) -| (protoH.north);
|
||||||
|
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.55, 6.8) -| (protoA.north);
|
||||||
|
% weak labels -> protoH: go south then hard-left below weakLab
|
||||||
|
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.55,6.8) -| (protoH.north east);
|
||||||
|
\draw[arr] (protoH.east) -- (kldist.west);
|
||||||
|
\draw[arr] (protoA.west) -- (kldist.east);
|
||||||
|
\draw[arr] (kldist.south) -- (calHead.north east);
|
||||||
|
\draw[arr] (calHead.east) -- (score.west);
|
||||||
|
|
||||||
|
%% ============================================================
|
||||||
|
%% Panel C x: 20.8–31.0 y: 2.2–10.0
|
||||||
|
%% ============================================================
|
||||||
|
\draw[panel] (20.8,2.2) rectangle (31.0,10.0);
|
||||||
|
\node[anchor=west, font=\small\bfseries] at (21.05,9.72) {(c) Distributionally robust control};
|
||||||
|
|
||||||
|
\node[bB, minimum width=3.1cm] (state) at (23.15, 8.9)
|
||||||
|
{State summary\\$[p_{t-1},\hat q_{t-1},f(\tau')]$};
|
||||||
|
\node[bY, minimum width=2.9cm] (ambSet) at (23.15, 7.45) {Ambiguity set\\$\mathcal U_\epsilon(\hat P_N)$};
|
||||||
|
\node[bG, minimum width=2.9cm] (innerMin) at (28.55, 7.45) {Inner minimisation\\$\min_{Q\in\mathcal U_\epsilon}$};
|
||||||
|
\node[bY, minimum width=8.2cm] (contScen) at (25.9, 5.9)
|
||||||
|
{Contamination scenarios $\;\alpha_k\in\mathcal A_{\epsilon_\alpha}(\alpha_0)$};
|
||||||
|
\node[bA, minimum width=8.8cm] (reward) at (25.9, 4.45)
|
||||||
|
{$r_t = R(p_t,\hat q_t) - \lambda\,\mathrm{COI}_{\mathrm{leak}}(p_t,\tau_t') - \eta\,UX_t$};
|
||||||
|
\node[bB, minimum width=2.85cm] (policy) at (22.75, 3.05) {Robust policy $\pi^*$};
|
||||||
|
\node[bG, minimum width=2.85cm] (publish) at (29.05, 3.05) {Publish price\\vector $p_t$};
|
||||||
|
|
||||||
|
\node[lbl] at (25.9, 2.55) {$\pi^*=\arg\max_\pi\min_{Q\in\mathcal U_\epsilon}\mathbb{E}[r_t]$};
|
||||||
|
|
||||||
|
\draw[arr] (state.south) -- (ambSet.north);
|
||||||
|
\draw[arr] (ambSet.east) -- (innerMin.west);
|
||||||
|
\draw[arr, rounded corners=4pt] (ambSet.south) -- (23.15, 6.6) -| ([xshift=-2cm]contScen.north);
|
||||||
|
\draw[arr, rounded corners=4pt] (innerMin.south) -- (28.55, 6.6) -| ([xshift=2cm]contScen.north);
|
||||||
|
\draw[arr] (contScen.south) -- (reward.north);
|
||||||
|
\draw[arr, rounded corners=6pt] (reward.south) -- (25.9, 3.7) -| (policy.north);
|
||||||
|
\draw[arr] (policy.east) -- (publish.west);
|
||||||
|
% market response: up the right edge of panel C, entirely inside, rounded
|
||||||
|
\draw[arrG, rounded corners=6pt] (publish.east) -- (30.6, 3.05)
|
||||||
|
-- (30.6, 9.8)
|
||||||
|
-- node[midway, lbl] {market response} (state.north |- 0, 9.8)
|
||||||
|
-- (state.north);
|
||||||
|
|
||||||
|
%% ============================================================
|
||||||
|
%% Cross-panel connectors – gutter at y = 1.0..2.2
|
||||||
|
%% Three separate depths: 1.85, 1.45, 1.05 (no overlaps)
|
||||||
|
%% ============================================================
|
||||||
|
|
||||||
|
% 1. Worker -> Session (depth y=1.85, shallowest)
|
||||||
|
\draw[crossA, rounded corners=6pt]
|
||||||
|
(worker.south) -- (worker.south |- 0, 1.85)
|
||||||
|
-- node[pos=0.5, lbl] {offline extraction} (11.4, 1.85)
|
||||||
|
-- (11.4, 8.9)
|
||||||
|
-- (session.west);
|
||||||
|
|
||||||
|
% 2. Score -> State (depth y=1.45)
|
||||||
|
\draw[crossG, rounded corners=6pt]
|
||||||
|
(score.south) -- (score.south |- 0, 1.45)
|
||||||
|
-- node[pos=0.5, lbl] {contamination signal} (20.6, 1.45)
|
||||||
|
-- (20.6, 8.9)
|
||||||
|
-- (state.west);
|
||||||
|
|
||||||
|
% 3. Publish -> Provider (depth y=1.05, deepest)
|
||||||
|
\draw[crossG, rounded corners=3pt]
|
||||||
|
(publish.south) -- (publish.south |- 0, 1.05)
|
||||||
|
-- node[pos=0.4, lbl] {serve online} (5.8, 1.05)
|
||||||
|
-- (5.8, 7.7)
|
||||||
|
-- ([yshift=-0.3cm]provider.west);
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
@@ -62,7 +62,7 @@ We propose a robust optimization objective. The platform seeks a pricing policy
|
|||||||
Here:
|
Here:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The first term, $p_t \cdot \hat{q}_t(p_t | \theta=H)$, represents the revenue generated strictly from the estimated human segment.
|
\item The first term, $p_t \cdot \hat{q}_t(p_t | \theta=H)$, represents the revenue generated strictly from the estimated human segment.
|
||||||
\item $\mathcal{L}_{detect}$ is a penalty term for failing to separate distributions (the cost of confusion).
|
\item $\mathcal{L}_{detect}$ is a penalty term for failing to distinguish distributions (the cost of confusion).
|
||||||
\item $\lambda$ is a hyperparameter balancing revenue exploitation vs. robust detection.
|
\item $\lambda$ is a hyperparameter balancing revenue exploitation vs. robust detection.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 84 KiB After Width: | Height: | Size: 324 KiB |
@@ -57,7 +57,7 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
|
|||||||
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
|
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
|
||||||
\item[Cost of Information (COI)] The average premium extracted above marginal cost due to information asymmetry.
|
\item[Cost of Information (COI)] The average premium extracted above marginal cost due to information asymmetry.
|
||||||
\item[Contamination Ratio] The proportion of agent sessions versus human sessions in the system.
|
\item[Contamination Ratio] The proportion of agent sessions versus human sessions in the system.
|
||||||
\item[Separability] The ability to distinguish between human and agent behavioral patterns.
|
\item[Distinguishability] The ability to distinguish between human and agent behavioral patterns.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\section{Aggregate Compute Budget Derivation}
|
\section{Aggregate Compute Budget Derivation}
|
||||||
|
|||||||
@@ -29,6 +29,9 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
|
|||||||
\vspace{1em}
|
\vspace{1em}
|
||||||
\noindent\textbf{Acknowledgments:} This research was supported by the TPU Research Cloud program, which provided access to Google Cloud TPU accelerators (including TPU v4, v5e, and v6e).
|
\noindent\textbf{Acknowledgments:} This research was supported by the TPU Research Cloud program, which provided access to Google Cloud TPU accelerators (including TPU v4, v5e, and v6e).
|
||||||
|
|
||||||
|
\vspace{0.5em}
|
||||||
|
\noindent\textbf{Project page:} \url{https://velocitatem.github.io/PHANTOM/}
|
||||||
|
|
||||||
\clearpage
|
\clearpage
|
||||||
\input{chapters/01-intro}
|
\input{chapters/01-intro}
|
||||||
\input{chapters/02-literature-review}
|
\input{chapters/02-literature-review}
|
||||||
@@ -43,15 +46,44 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
|
|||||||
\appendix
|
\appendix
|
||||||
\section{Terminology}
|
\section{Terminology}
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Agent $A$] An actor of non-human nature, powered by an LLM.
|
\item[Agent $A$] A non-human actor, typically an LLM-driven system that executes web actions toward a goal.
|
||||||
\item[Human $H$] An individual human with some job to be done.
|
\item[Human $H$] A human participant interacting with the platform to complete a task.
|
||||||
\item[Actor $\theta$] Defines a type of class which is either Agent or Human and has the capability to carry out actions on a web platform.
|
\item[Actor Type $\theta$] A latent class parameter describing whether a session is generated by a human or an agent profile.
|
||||||
\item[Platform] Any web-based platform which serves an interface to a collection of items that can be purchased, each at some price $p_i$.
|
\item[Platform] A web interface exposing purchasable items and their offered prices.
|
||||||
\item[Behavioral Model] A mathematical model predicting what action comes after a series of prior actions.
|
\item[Session $s$] A bounded interaction record tied to one actor and one session identifier.
|
||||||
\item[LLM] Large Language Model served by some provider with the abstracted capability of tool calling.
|
\item[Event $e_{s,k}$] A single interaction tuple in a session, including action, item target, and timestamp.
|
||||||
\item[TPU] Tensor Processing Unit which is a unique kind of chip architecture developed by Google.
|
\item[Trajectory $\tau_s$] The ordered sequence of events generated within a session.
|
||||||
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
|
\item[Demand Proxy $\hat{q}_{t,i}$] A weighted aggregate of observed actions used as an operational substitute for latent demand.
|
||||||
% TODO: maybe define other things in a similar succient manner
|
\item[Action Weight Function $\omega(a)$] A mapping from action type to signal strength in the demand proxy.
|
||||||
|
\item[True Demand $d(p;\theta)$] The latent purchase response as a function of price and actor type.
|
||||||
|
\item[Contamination $\alpha$] The proportion of agent-generated traffic in the session mixture.
|
||||||
|
\item[Non-stationary Noise $\epsilon_t$] Time-varying residual variation not explained by the actor mixture.
|
||||||
|
\item[Pricing Policy $\pi(\tau)$] A function mapping observed interaction history to an offered price.
|
||||||
|
\item[Cost of Information (COI)] The expected premium above the minimum viable price induced by the pricing policy.
|
||||||
|
\item[COI Leakage] A per-quote penalty term modeling information revealed to reconnaissance behavior.
|
||||||
|
\item[First-Order Statistic $p_{(1)}$] The minimum observed price among multiple independent queries.
|
||||||
|
\item[Transition Kernel $\mathcal{T}$] A Markov transition matrix over behavioral states or actions.
|
||||||
|
\item[Distinguishability] The degree to which human and agent sessions can be distinguished from behavior alone.
|
||||||
|
\item[KL Divergence $D_{KL}$] A relative-entropy measure used to compare session transition structure against class prototypes.
|
||||||
|
\item[Divergence Scores $\Delta_H,\Delta_A$] Session-level distances to human and agent transition centroids.
|
||||||
|
\item[Weak Agent Probability $f(\tau)$] A session-level score estimating the likelihood that a trajectory is agent-generated.
|
||||||
|
\item[Contamination Generator $\mathcal{G}(\alpha)$] A simulator component that injects synthetic agent trajectories to reach a target mixture level.
|
||||||
|
\item[Stackelberg Game] A leader-follower formulation where the platform sets prices and demand responds.
|
||||||
|
\item[Ambiguity Set $\mathcal{U}_{\epsilon}$] A set of plausible demand distributions considered under distributional uncertainty.
|
||||||
|
\item[Wasserstein Ball] A distance-bounded neighborhood around an empirical distribution used in robust optimization.
|
||||||
|
\item[DR-RL] Distributionally Robust Reinforcement Learning for policies trained against worst-case distributional shifts.
|
||||||
|
\item[Nominal Contamination $\alpha_0$] The baseline contamination level around which robust candidates are evaluated.
|
||||||
|
\item[Robustness Radius $\epsilon_\alpha$] The local interval width used for inner minimization over contamination scenarios.
|
||||||
|
\item[Query-Tax Surrogate] A constant leakage proxy assigning fixed penalty to suspected reconnaissance queries.
|
||||||
|
\item[Revelation Surrogate] A leakage proxy based on $-\log\pi(p\mid\tau)$ to penalize highly informative quotes.
|
||||||
|
\item[Limbo Stack] The alternating game-history buffer that stores leader price moves and follower demand responses.
|
||||||
|
\item[UX Index] A bounded user-experience metric tracked to evaluate policy side effects on legitimate users.
|
||||||
|
\item[Look-to-Book Ratio] The ratio of search-like interactions to completed purchases, used as an operational contamination indicator.
|
||||||
|
\item[Hybrid Kappa-Lambda Architecture] A data design combining streaming ingestion with offline and batch learning loops.
|
||||||
|
\item[MDP / POMDP] Sequential decision models with full observability (MDP) or partial observability (POMDP).
|
||||||
|
\item[Behavioral Model] A model predicting what action is likely to follow from prior actions.
|
||||||
|
\item[LLM] Large Language Model served through an inference provider with tool-use capability.
|
||||||
|
\item[TPU] Tensor Processing Unit, a specialized accelerator architecture developed by Google.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\section{Aggregate Compute Budget Derivation}
|
\section{Aggregate Compute Budget Derivation}
|
||||||
@@ -78,6 +110,30 @@ v4 & 64 & 275 & $64 \times 275 = 17{,}600$ \\
|
|||||||
|
|
||||||
Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
|
Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
|
||||||
|
|
||||||
|
\section{Slope-Test Verification: Revenue vs. Contamination}
|
||||||
|
\label{app:alpha_revenue_slope}
|
||||||
|
|
||||||
|
This appendix provides a compact verification of the slope result reported in the main results section. Using the same run-level pairs $x_i=\texttt{study/alpha}_i$ and $y_i=\texttt{eval/revenue\_mean}_i$ ($n=95$), we re-checked the ordinary least squares slope test in Python with standard test routines (SciPy two-sided $t$ test for the slope).
|
||||||
|
|
||||||
|
\[
|
||||||
|
\widehat{y}=326{,}878.57-60{,}631.95\,x,
|
||||||
|
\]
|
||||||
|
\[
|
||||||
|
t(93)=-8.2148,\qquad p=1.2038\times 10^{-12},\qquad R^2=0.4205,\qquad 95\%\,\text{CI}_{\beta_1}=[-75{,}288.76,\,-45{,}975.13].
|
||||||
|
\]
|
||||||
|
|
||||||
|
The Python verification reproduces the reported coefficients and inference values, confirming that the slope-test results are correct under standard methods.
|
||||||
|
|
||||||
|
\section{whoclickedit Dataset Card}
|
||||||
|
\label{app:whoclicked_card}
|
||||||
|
|
||||||
|
For transparency and reproducibility, this appendix includes the full dataset card used for the public release of the \texttt{whoclickedit} dataset.
|
||||||
|
|
||||||
|
\lstinputlisting[
|
||||||
|
caption={whoclickedit dataset card (README snapshot)},
|
||||||
|
label={lst:whoclicked_dataset_card}
|
||||||
|
]{chapters/auto/whoclicked_dataset_card.md}
|
||||||
|
|
||||||
% \input{../build/concatenated_code}
|
% \input{../build/concatenated_code}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
@@ -41,7 +41,7 @@
|
|||||||
|
|
||||||
\begin{abstract}
|
\begin{abstract}
|
||||||
Dynamic pricing pipelines in e-commerce consume behavioral demand signals to set prices, but the growing presence of LLM-powered agents introduces a novel contamination vector: these agents decouple information gathering from transaction execution across isolated sessions, eroding the platform's pricing power.
|
Dynamic pricing pipelines in e-commerce consume behavioral demand signals to set prices, but the growing presence of LLM-powered agents introduces a novel contamination vector: these agents decouple information gathering from transaction execution across isolated sessions, eroding the platform's pricing power.
|
||||||
We present PHANTOM, a modular compound system that addresses this threat end-to-end. The system is composed of five orchestrated components: (1)~a configurable e-commerce research platform with dual-stream Kafka ingestion for behavioral and price-exposure events, (2)~a GOFAI-based weak labeling stage that partitions sessions into human and agent classes using rule-based predicates, (3)~a transition-kernel estimator that learns separable Markov models for each actor type and constructs a Contamination Generator for controlled simulation, (4)~a Distributionally Robust Reinforcement Learning policy that optimizes pricing under a Wasserstein ambiguity set conditioned on per-session divergence signals, and (5)~an Airflow-orchestrated pipeline that connects online data collection to offline policy training via Redis-backed model serving.
|
We present PHANTOM, a modular compound system that addresses this threat end-to-end. The system is composed of five orchestrated components: (1)~a configurable e-commerce research platform with dual-stream Kafka ingestion for behavioral and price-exposure events, (2)~a GOFAI-based weak labeling stage that partitions sessions into human and agent classes using rule-based predicates, (3)~a transition-kernel estimator that learns distinguishable Markov models for each actor type and constructs a Contamination Generator for controlled simulation, (4)~a Distributionally Robust Reinforcement Learning policy that optimizes pricing under a Wasserstein ambiguity set conditioned on per-session divergence signals, and (5)~an Airflow-orchestrated pipeline that connects online data collection to offline policy training via Redis-backed model serving.
|
||||||
We formally derive the Cost of Information Theorem, proving that standard pricing mechanisms become incentive-incompatible as agent query volume grows. The system architecture, interaction schema, and factorial experiment harness are designed for reproducibility and are released as open artifacts. We evaluate system-level tradeoffs between revenue protection, information leakage, and user-experience degradation through a three-objective reward structure.
|
We formally derive the Cost of Information Theorem, proving that standard pricing mechanisms become incentive-incompatible as agent query volume grows. The system architecture, interaction schema, and factorial experiment harness are designed for reproducibility and are released as open artifacts. We evaluate system-level tradeoffs between revenue protection, information leakage, and user-experience degradation through a three-objective reward structure.
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ The current innovation boom in generative artificial intelligence and its applic
|
|||||||
|
|
||||||
The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. When agent-driven reconnaissance traffic contaminates these demand signals, the pricing pipeline produces biased estimates that erode margins. This is not a single-model failure but a \textit{compound system} failure: the data ingestion, demand estimation, policy optimization, and model serving stages each propagate and amplify the contamination.
|
The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. When agent-driven reconnaissance traffic contaminates these demand signals, the pricing pipeline produces biased estimates that erode margins. This is not a single-model failure but a \textit{compound system} failure: the data ingestion, demand estimation, policy optimization, and model serving stages each propagate and amplify the contamination.
|
||||||
|
|
||||||
Existing work treats bot detection and dynamic pricing as separate concerns. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience. This gap is what our contribution aims to address.
|
Existing work treats bot detection and dynamic pricing as separate concerns. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for distinguishing non-human reconnaissance from genuine human demand expression and integrating that distinguishability into pricing heuristics without degrading legitimate user experience. This gap is what our contribution aims to address.
|
||||||
|
|
||||||
\subsection{System-Level Contributions}
|
\subsection{System-Level Contributions}
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ We frame our contribution along the four CAIS pillars---architectural patterns,
|
|||||||
|
|
||||||
This work addresses three core research questions:
|
This work addresses three core research questions:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item[\textbf{RQ1}] \textit{Separability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
\item[\textbf{RQ1}] \textit{Distinguishability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
||||||
\item[\textbf{RQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
\item[\textbf{RQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
||||||
\item[\textbf{RQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
\item[\textbf{RQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
@@ -115,7 +115,7 @@ Each price query generates a record $(i, p, \text{sid}, \phi, t)$ associating th
|
|||||||
|
|
||||||
\subsection{Offline Loop: Policy Training}
|
\subsection{Offline Loop: Policy Training}
|
||||||
|
|
||||||
The Kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The offline loop consumes collected trajectories, performs weak labeling and transition-kernel estimation (Section~\ref{sec:separability}), trains the DR-RL policy (Section~\ref{sec:drrl}) in a simulator, and pushes the resulting policy to Redis for the pricing provider to read.
|
The Kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The offline loop consumes collected trajectories, performs weak labeling and transition-kernel estimation (Section~\ref{sec:distinguishability}), trains the DR-RL policy (Section~\ref{sec:drrl}) in a simulator, and pushes the resulting policy to Redis for the pricing provider to read.
|
||||||
|
|
||||||
\subsection{Online Dynamic Pricing (Baseline)}
|
\subsection{Online Dynamic Pricing (Baseline)}
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ The metadata record $\mu$ varies by action type. This heterogeneous structure is
|
|||||||
%% ====================================================================
|
%% ====================================================================
|
||||||
\section{Methodology: Pipeline Components}
|
\section{Methodology: Pipeline Components}
|
||||||
|
|
||||||
This section details the theoretical and practical framework behind each pipeline component. We formalize the problem environment, derive the \textit{Cost of Information} (COI) theorem that motivates the system design, describe the separability and contamination modules, and formulate the robust pricing policy.
|
This section details the theoretical and practical framework behind each pipeline component. We formalize the problem environment, derive the \textit{Cost of Information} (COI) theorem that motivates the system design, describe the distinguishability and contamination modules, and formulate the robust pricing policy.
|
||||||
|
|
||||||
\subsection{Problem Formalization}
|
\subsection{Problem Formalization}
|
||||||
|
|
||||||
@@ -225,15 +225,15 @@ Since the integrand vanishes as $N \to \infty$ for all $t > \underline{p}$, the
|
|||||||
This result is the theoretical motivation for the system design: it proves that standard pricing policies $\pi$ fail to extract surplus in the presence of large-scale agentic search, necessitating a contamination-aware component in the pipeline.
|
This result is the theoretical motivation for the system design: it proves that standard pricing policies $\pi$ fail to extract surplus in the presence of large-scale agentic search, necessitating a contamination-aware component in the pipeline.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Module: Separability and Contamination Generation}
|
\subsection{Module: Distinguishability and Contamination Generation}
|
||||||
\label{sec:separability}
|
\label{sec:distinguishability}
|
||||||
|
|
||||||
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from collected data using a two-stage approach.
|
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from collected data using a two-stage approach.
|
||||||
|
|
||||||
\subsubsection{GOFAI-Based Weak Labeling.}
|
\subsubsection{GOFAI-Based Weak Labeling.}
|
||||||
We use Good Old-Fashioned AI (GOFAI) heuristics to generate weak labels for separability. A set of rule-based predicates $\phi_j: \tau \to \{0,1\}$ partitions dataset $\mathcal{D}$ into high-confidence sets $\mathcal{D}_H$ and $\mathcal{D}_A$. We then estimate separate transition models for both groups and ask a direct methodological question: are the kernels separable enough to justify downstream pricing control that depends on that separability?
|
We use Good Old-Fashioned AI (GOFAI) heuristics to generate weak labels for distinguishability. A set of rule-based predicates $\phi_j: \tau \to \{0,1\}$ partitions dataset $\mathcal{D}$ into high-confidence sets $\mathcal{D}_H$ and $\mathcal{D}_A$. We then estimate separate transition models for both groups and ask a direct methodological question: are the kernels distinguishable enough to justify downstream pricing control that depends on that distinguishability?
|
||||||
|
|
||||||
To answer this, we compute average KL divergence between transition probability matrices. This statistic gives global separability and event-level diagnostics at the same time. In our balanced dataset (50\% human, 50\% agent), the average divergence is approximately $1.8$.
|
To answer this, we compute average KL divergence between transition probability matrices. This statistic gives global distinguishability and event-level diagnostics at the same time. In our recorded dataset (13 human sessions, 16 agent sessions; 45\%/55\%), the average divergence is approximately $1.8$.
|
||||||
|
|
||||||
\begin{definition}[KL Divergence for Transition Distributions]
|
\begin{definition}[KL Divergence for Transition Distributions]
|
||||||
Let $P_e$ and $Q_e$ be categorical distributions over destination states following event $e$, derived from human and agent trajectories respectively. The KL divergence between these distributions is:
|
Let $P_e$ and $Q_e$ be categorical distributions over destination states following event $e$, derived from human and agent trajectories respectively. The KL divergence between these distributions is:
|
||||||
@@ -243,7 +243,7 @@ Let $P_e$ and $Q_e$ be categorical distributions over destination states followi
|
|||||||
where $\mathcal{S}_e$ denotes the set of destination events that follow $e$ in the human trajectories.
|
where $\mathcal{S}_e$ denotes the set of destination events that follow $e$ in the human trajectories.
|
||||||
\end{definition}
|
\end{definition}
|
||||||
|
|
||||||
With these divergence features we train a contrastive model to estimate a weak agent probability $f(\tau)\in[0,1]$, which serves as the interface between the separability module and the downstream pricing policy.
|
With these divergence features we train a contrastive model to estimate a weak agent probability $f(\tau)\in[0,1]$, which serves as the interface between the distinguishability module and the downstream pricing policy.
|
||||||
|
|
||||||
\subsubsection{Transition-Kernel Estimation and Contamination Generator.}
|
\subsubsection{Transition-Kernel Estimation and Contamination Generator.}
|
||||||
\label{sec:tpe}
|
\label{sec:tpe}
|
||||||
@@ -282,12 +282,12 @@ Given a newly observed partial trajectory $\tau'$, we compute its empirical tran
|
|||||||
\Delta_A(\tau') &= D_{KL}(\hat{\mathcal{T}}^\prime \parallel \bar{\mathcal{T}}_A)
|
\Delta_A(\tau') &= D_{KL}(\hat{\mathcal{T}}^\prime \parallel \bar{\mathcal{T}}_A)
|
||||||
\end{align}
|
\end{align}
|
||||||
|
|
||||||
These divergence statistics serve as the operational connector between the separability module and the pricing policy. We define the per-session contamination estimate as:
|
These divergence statistics serve as the operational connector between the distinguishability module and the pricing policy. We define the per-session contamination estimate as:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:alpha_hat}
|
\label{eq:alpha_hat}
|
||||||
\hat{\alpha}(\tau') = \sigma\big(\beta(\Delta_H(\tau') - \Delta_A(\tau'))\big)
|
\hat{\alpha}(\tau') = \sigma\big(\beta(\Delta_H(\tau') - \Delta_A(\tau'))\big)
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where $\sigma$ is the logistic function and $\beta > 0$ is a temperature parameter. This maps separability directly into a scalar control input for the pricing objective.
|
where $\sigma$ is the logistic function and $\beta > 0$ is a temperature parameter. This maps distinguishability directly into a scalar control input for the pricing objective.
|
||||||
|
|
||||||
\subsubsection{Ambiguity Set Construction.}
|
\subsubsection{Ambiguity Set Construction.}
|
||||||
Because the contamination level $\alpha$ and demand shift are non-stationary, a point estimate of the demand distribution is insufficient. Let $\hat{P}_N$ denote the empirical reference distribution induced by the Contamination Generator $\mathcal{G}(\alpha)$. We define the Wasserstein ambiguity set:
|
Because the contamination level $\alpha$ and demand shift are non-stationary, a point estimate of the demand distribution is insufficient. Let $\hat{P}_N$ denote the empirical reference distribution induced by the Contamination Generator $\mathcal{G}(\alpha)$. We define the Wasserstein ambiguity set:
|
||||||
@@ -344,7 +344,7 @@ The simulator has multiple configurable factors, including valuation distributio
|
|||||||
|
|
||||||
Our training budget spans 384 TPU chips across v4, v5e, and v6e generations, distributed across Europe and U.S. regions with a spot-heavy mix and an on-demand reserve. At peak BF16 throughput this corresponds to roughly 160 PFLOPS of aggregate compute. We allocate v6e capacity to the heaviest policy training, use v5e for broad hyperparameter sweeps, and reserve on-demand v4 quota for runs that should not be preempted \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
|
Our training budget spans 384 TPU chips across v4, v5e, and v6e generations, distributed across Europe and U.S. regions with a spot-heavy mix and an on-demand reserve. At peak BF16 throughput this corresponds to roughly 160 PFLOPS of aggregate compute. We allocate v6e capacity to the heaviest policy training, use v5e for broad hyperparameter sweeps, and reserve on-demand v4 quota for runs that should not be preempted \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
|
||||||
|
|
||||||
Our process follows three stages: (1)~observe and \textit{vectorize} behavioral interactions, (2)~learn separability to characterize human versus agent patterns, and (3)~use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
Our process follows three stages: (1)~observe and \textit{vectorize} behavioral interactions, (2)~learn distinguishability to characterize human versus agent patterns, and (3)~use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
||||||
|
|
||||||
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is intentionally a disconnected component that feeds the later contributions.
|
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is intentionally a disconnected component that feeds the later contributions.
|
||||||
|
|
||||||
@@ -375,7 +375,7 @@ Initialize contamination estimate $\hat\alpha \leftarrow 0.2$\;
|
|||||||
$\mathcal S_t \leftarrow \mathcal S_t \cup \{\tau_m\}$\;
|
$\mathcal S_t \leftarrow \mathcal S_t \cup \{\tau_m\}$\;
|
||||||
}
|
}
|
||||||
|
|
||||||
\tcp{Estimate contamination from separability module}
|
\tcp{Estimate contamination from distinguishability module}
|
||||||
compute $\hat\alpha \leftarrow \frac{1}{M}\sum_{\tau\in\mathcal S_t} \Big[\sigma\big(\beta(\Delta_H(\tau)-\Delta_A(\tau))\big)\Big]$\;
|
compute $\hat\alpha \leftarrow \frac{1}{M}\sum_{\tau\in\mathcal S_t} \Big[\sigma\big(\beta(\Delta_H(\tau)-\Delta_A(\tau))\big)\Big]$\;
|
||||||
|
|
||||||
compute $J_t \leftarrow \text{Revenue}(p_t,\hat Q_t) - \lambda\cdot \text{COILeak}(\hat\alpha) - \eta\cdot \text{UX}(\hat\alpha)$\;
|
compute $J_t \leftarrow \text{Revenue}(p_t,\hat Q_t) - \lambda\cdot \text{COILeak}(\hat\alpha) - \eta\cdot \text{UX}(\hat\alpha)$\;
|
||||||
@@ -430,7 +430,7 @@ We formally defined the Cost of Information and proved that as the saturation of
|
|||||||
|
|
||||||
The system architecture, interaction schema, configurable e-commerce testbed, and factorial experiment harness are designed for reproducibility and released as open artifacts. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
|
The system architecture, interaction schema, configurable e-commerce testbed, and factorial experiment harness are designed for reproducibility and released as open artifacts. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
|
||||||
|
|
||||||
Future work includes full factorial evaluation of the DR-RL policy across contamination levels, online adaptation of the ambiguity radius $\epsilon$ as a function of live divergence estimates, extension to multi-agent market maker settings, and integration of the HAP protocol~\cite{dhir_http_2025} as an additional signal source for the separability module.
|
Future work includes full factorial evaluation of the DR-RL policy across contamination levels, online adaptation of the ambiguity radius $\epsilon$ as a function of live divergence estimates, extension to multi-agent market maker settings, and integration of the HAP protocol~\cite{dhir_http_2025} as an additional signal source for the distinguishability module.
|
||||||
|
|
||||||
|
|
||||||
%% ====================================================================
|
%% ====================================================================
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
|
|
||||||
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove separability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
|
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove distinguishability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned distinguishability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
|
||||||
|
|
||||||
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned separability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a cutoff set at the date of March 1st 2026.}
|
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned distinguishability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a cutoff set at the date of March 1st 2026.}
|
||||||
|
|
||||||
\subsection{Motivation and Market Context}
|
\subsection{Motivation and Market Context}
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ We formally define interaction data as coming from some actor which can either b
|
|||||||
This dissertation is organized around one main research question and three supporting sub-questions:
|
This dissertation is organized around one main research question and three supporting sub-questions:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item[\textbf{Main RQ}] How can dynamic pricing systems preserve margin integrity when transaction orchestration is increasingly mediated by non-human agents?
|
\item[\textbf{Main RQ}] How can dynamic pricing systems preserve margin integrity when transaction orchestration is increasingly mediated by non-human agents?
|
||||||
\item[\textbf{SQ1}] \textit{Separability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
\item[\textbf{SQ1}] \textit{Distinguishability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
|
||||||
\item[\textbf{SQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
\item[\textbf{SQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
|
||||||
\item[\textbf{SQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
\item[\textbf{SQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
@@ -59,4 +59,4 @@ Extract final result from terminal state\;
|
|||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
|
The previously described goal of distinguishability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
\section{Literature Review}
|
\section{Literature Review}
|
||||||
|
|
||||||
To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
|
To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for distinguishing non-human reconnaissance from genuine human demand expression and integrating that distinguishability into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
|
||||||
|
|
||||||
\subsection{Agent Taxonomy and Definitions}
|
\subsection{Agent Taxonomy and Definitions}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
\section{Methodology}
|
\section{Methodology}
|
||||||
|
|
||||||
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven separability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
|
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven distinguishability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
|
||||||
|
|
||||||
\subsection{Problem Formalization}
|
\subsection{Problem Formalization}
|
||||||
|
|
||||||
@@ -109,13 +109,13 @@ Since users act with motivations, we define a pool of tasks (jobs to be done) an
|
|||||||
|
|
||||||
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor.
|
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor.
|
||||||
|
|
||||||
The human data collection involved 18 participants, all of whom provided explicit informed consent prior to their session. Participants had an average age of 21 years and were recruited from a university population. Alongside the 18 human sessions we ran 18 agent sessions of equivalent task scope, giving a balanced dataset of 36 labeled trajectories. Each participant was assigned a single platform mode and a single task drawn from the pool, and completed the session independently without guidance on navigation or pricing strategy.
|
The human data collection involved 13 participants, all of whom provided explicit informed consent prior to their session. Participants had an average age of 21 years and were recruited from a university population. Alongside the 13 human sessions we ran 16 agent sessions of equivalent task scope, yielding 29 labeled trajectories in total (45\% human, 55\% agent). Each participant was assigned a single platform mode and a single task drawn from the pool, and completed the session independently without guidance on navigation or pricing strategy.
|
||||||
|
|
||||||
To evaluate quality and realism of the setup, we store both structured event logs and full interaction transcripts. This lets us combine quantitative analysis with transcript-level qualitative findings. The result is an isolated system where we can control the interaction process while preserving realistic behavior.
|
To evaluate quality and realism of the setup, we store both structured event logs and full interaction transcripts. This lets us combine quantitative analysis with transcript-level qualitative findings. The result is an isolated system where we can control the interaction process while preserving realistic behavior.
|
||||||
|
|
||||||
Operationally, goals and experiment runs are tracked in PostgreSQL. This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to separate classes (agent vs human) with session-conditioned probability estimates, then injects those estimates into the pricing learner.
|
Operationally, goals and experiment runs are tracked in PostgreSQL. This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to distinguish classes (agent vs human) with session-conditioned probability estimates, then injects those estimates into the pricing learner.
|
||||||
|
|
||||||
Our process follows three stages: (1) observe and vectorize behavioral interactions, (2) learn separability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
Our process follows three stages: (1) observe and vectorize behavioral interactions, (2) learn distinguishability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
|
||||||
|
|
||||||
\begin{figure}[ht]
|
\begin{figure}[ht]
|
||||||
\resizebox{\columnwidth}{!}{%
|
\resizebox{\columnwidth}{!}{%
|
||||||
@@ -209,15 +209,15 @@ In the simulator baseline this order is encoded with a compact fixed scale: cart
|
|||||||
|
|
||||||
In addition to behavioral events, the platform logs price observations to a separate Kafka topic. Each price query generates a record associating the product, displayed price, requesting session, platform mode, and timestamp. This dual-stream architecture enables joint analysis of price exposure and behavioral response.
|
In addition to behavioral events, the platform logs price observations to a separate Kafka topic. Each price query generates a record associating the product, displayed price, requesting session, platform mode, and timestamp. This dual-stream architecture enables joint analysis of price exposure and behavioral response.
|
||||||
|
|
||||||
\subsection{Generative Contamination and Separability}
|
\subsection{Generative Contamination and Distinguishability}
|
||||||
|
|
||||||
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from Phantom data using a two-stage approach.
|
To train a robust pricing learner, we need a simulator that can generate realistic interaction data under controlled contamination. We build this from Phantom data using a two-stage approach.
|
||||||
|
|
||||||
\subsubsection{Ground-Truth Separability}
|
\subsubsection{Ground-Truth Distinguishability}
|
||||||
|
|
||||||
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels (human or agent) are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition, treating the resulting human and agent kernels as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels separable enough to justify downstream pricing control that depends on that separability?
|
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels (human or agent) are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition, treating the resulting human and agent kernels as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels distinguishable enough to justify downstream pricing control that depends on that distinguishability?
|
||||||
|
|
||||||
To answer this, we compute per-session divergence scores against both class-level centroids. For each session in either partition, we fit a session-level event transition kernel from that session's trajectory alone, then compute its average divergence to the human centroid and to the agent centroid. The per-session separability score is the gap between these two divergences: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
|
To answer this, we compute per-session divergence scores against both class-level centroids. For each session in either partition, we fit a session-level event transition kernel from that session's trajectory alone, then compute its average divergence to the human centroid and to the agent centroid. The per-session distinguishability score is the gap between these two divergences: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
|
||||||
|
|
||||||
We cannot assume normal distributions for divergence scores, which are right-skewed and bounded below by zero, so we do not use a Student's t-test. Instead we apply a Mann-Whitney U test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations.
|
We cannot assume normal distributions for divergence scores, which are right-skewed and bounded below by zero, so we do not use a Student's t-test. Instead we apply a Mann-Whitney U test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations.
|
||||||
|
|
||||||
@@ -305,7 +305,7 @@ We also consider taxation-like overlays for agent traffic under strategy-proof m
|
|||||||
|
|
||||||
\subsubsection{Pricing Mechanism Summary}
|
\subsubsection{Pricing Mechanism Summary}
|
||||||
|
|
||||||
We now present the complete pricing mechanism that integrates the behavioral separability, contamination estimation, and robust optimization components developed in the preceding sections. The defensive pricing loop algorithm formalizes the process as a Stackelberg game where the platform (leader) sets prices and the aggregate demand (follower) responds through observed session trajectories.
|
We now present the complete pricing mechanism that integrates the behavioral distinguishability, contamination estimation, and robust optimization components developed in the preceding sections. The defensive pricing loop algorithm formalizes the process as a Stackelberg game where the platform (leader) sets prices and the aggregate demand (follower) responds through observed session trajectories.
|
||||||
|
|
||||||
\begin{algorithm}[t]
|
\begin{algorithm}[t]
|
||||||
\caption{PHANTOM defensive pricing loop}
|
\caption{PHANTOM defensive pricing loop}
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
\section{Results}
|
\section{Results}
|
||||||
\begin{figure}[ht]
|
\begin{figure}[ht]
|
||||||
\centering
|
\centering
|
||||||
\input{chapters/figures/supra.tex}
|
\input{chapters/figures/supra/supra.tex}
|
||||||
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as SAC as can be clearly seen by the high density at the highest available price.}
|
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as SAC as can be clearly seen by the high density at the highest available price.}
|
||||||
\label{fig:supra_heatmap}
|
\label{fig:supra_heatmap}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Behavioral Analysis}
|
\subsection{Behavioral Analysis}
|
||||||
|
|
||||||
Separability between human and agent sessions is evaluated by computing per-session divergence gap scores (how much closer each session is to the human baseline versus the agent baseline) and comparing the two groups with a Mann-Whitney U test. The table below reports the group-level descriptive statistics for the gap scores and the test result.
|
Distinguishability between human and agent sessions is evaluated by computing per-session divergence gap scores (how much closer each session is to the human baseline versus the agent baseline) and comparing the two groups with a Mann-Whitney U test. The full recorded cohort contains 13 human sessions and 16 agent sessions, and the table below reports the corresponding group-level statistics and test result.
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
@@ -18,19 +18,19 @@ Separability between human and agent sessions is evaluated by computing per-sess
|
|||||||
\toprule
|
\toprule
|
||||||
Group & n & Mean gap & Std \\
|
Group & n & Mean gap & Std \\
|
||||||
\midrule
|
\midrule
|
||||||
Human sessions & 11 & $-3.3522$ & $2.6748$ \\
|
Human sessions & 13 & $-3.35$ & $2.67$ \\
|
||||||
Agent sessions & 6 & $+1.6482$ & $2.8349$ \\
|
Agent sessions & 16 & $+1.65$ & $2.83$ \\
|
||||||
\midrule
|
\midrule
|
||||||
\multicolumn{4}{l}{Mann-Whitney $U = 2.0$, $p = 0.0006$ (two-sided)} \\
|
\multicolumn{4}{l}{Mann-Whitney two-sided test: $p<0.001$} \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid). The two-sided p-value of 0.0006 (which means there is only a 0.06\% chance this pattern occurred by random luck) indicates near-complete rank separation between the groups at n=11 humans and n=6 agents, providing strong evidence that the transition kernels are separable enough to justify their use as a control signal in downstream pricing.
|
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid). The two-sided test result (p less than 0.001) at n=13 humans and n=16 agents indicates strong rank distinction between groups, providing evidence that the transition kernels are distinguishable enough to justify their use as a control signal in downstream pricing.
|
||||||
|
|
||||||
\subsection{Experimental Outcomes}
|
\subsection{Experimental Outcomes}
|
||||||
|
|
||||||
To evaluate robustness contributions, we compare two policies on the same environment family: (i) robust pricing with COI-aware reward and adversarial contamination step, and (ii) non-robust baseline with revenue-only reward (no-robust flag).
|
To evaluate robustness contributions, we compare two policies on the same environment family: (i) robust pricing with COI-aware reward and adversarial contamination step, and (ii) a baseline policy with revenue-only reward.
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
@@ -41,7 +41,7 @@ To evaluate robustness contributions, we compare two policies on the same enviro
|
|||||||
Policy & Eval reward & Eval revenue & COI leakage & Margin collapse rate \\
|
Policy & Eval reward & Eval revenue & COI leakage & Margin collapse rate \\
|
||||||
\midrule
|
\midrule
|
||||||
Robust policy & \textit{TBD} & \textit{TBD} & \textit{TBD} & \textit{TBD} \\
|
Robust policy & \textit{TBD} & \textit{TBD} & \textit{TBD} & \textit{TBD} \\
|
||||||
Non-robust baseline (\texttt{--no-robust}) & \textit{TBD} & \textit{TBD} & \textit{TBD} & \textit{TBD} \\
|
Baseline policy & \textit{TBD} & \textit{TBD} & \textit{TBD} & \textit{TBD} \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{table}
|
\end{table}
|
||||||
@@ -50,6 +50,6 @@ This comparison isolates the effect of robustness terms from model capacity and
|
|||||||
|
|
||||||
\subsection{Interpretation and Insights}
|
\subsection{Interpretation and Insights}
|
||||||
|
|
||||||
The Mann-Whitney result (U=2.0, p less than 0.001) confirms that per-session divergence gaps separate the two actor classes with near-zero overlap in rank ordering. This is the condition required for separability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
|
The Mann-Whitney result (p less than 0.001) confirms that per-session divergence gaps distinguish the two actor classes with near-zero overlap in rank ordering. This is the condition required for distinguishability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
|
||||||
|
|
||||||
\subsection{Anomalies}
|
\subsection{Anomalies}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user