Merge pull request #57 from velocitatem/first-last-todos

First last todos
This commit is contained in:
Daniel Alves Rösel
2026-04-10 15:03:20 +04:00
committed by GitHub
87 changed files with 2573 additions and 1411 deletions

View File

@@ -9,6 +9,12 @@ on:
paths:
- 'paper/**'
- '.github/**'
workflow_dispatch:
inputs:
skip_mirrors:
description: Skip Codex mirror generation (avoids API quota use)
type: boolean
default: false
jobs:
build:
runs-on: ubuntu-latest
@@ -24,8 +30,10 @@ jobs:
- name: Prepare appendix code snapshot
run: bash paper/concat_code.sh
# Repo variable SKIP_CODEX_MIRRORS=true skips on push/PR; workflow_dispatch can set skip_mirrors.
- name: Generate mirrors with Codex
if: ${{ env.OPENAI_API_KEY != '' }}
if: ${{ env.OPENAI_API_KEY != '' && vars.SKIP_CODEX_MIRRORS != 'true' && (github.event_name != 'workflow_dispatch' || github.event.inputs.skip_mirrors != 'true') }}
continue-on-error: true
uses: openai/codex-action@v1
with:
openai-api-key: ${{ env.OPENAI_API_KEY }}

3
.gitignore vendored
View File

@@ -34,6 +34,9 @@ dist/
**/*.parquet
**/_build/
# mkdocs output (run make docs.platform locally or rely on CI)
docs/documentation/
# paper build artifacts
paper/src/bib/auto
paper/src/auto/*

View File

@@ -44,7 +44,7 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
.PHONY: help
help:
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all"
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.summary pdf.summary.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | docs.platform | manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all"
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
@echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
@@ -102,6 +102,14 @@ pdf.genpop.watch:
pdf.arxiv:
@bash scripts/nx_paper.sh build-arxiv
.PHONY: pdf.summary
pdf.summary:
@bash scripts/nx_paper.sh build-summary
.PHONY: pdf.summary.watch
pdf.summary.watch:
@bash scripts/nx_paper.sh watch-summary
.PHONY: test.backend
test.backend:
@$(NX) run research:test
@@ -186,6 +194,19 @@ study.margin-erosion:
study.margin-erosion.quick:
python -m engine.studies.margin_erosion_alpha --quick
DOCS_VENV ?= docs/.venv
DOCS_MKDOCS := $(DOCS_VENV)/bin/mkdocs
DOCS_PIP := $(DOCS_VENV)/bin/pip
.PHONY: docs.platform
docs.platform: $(DOCS_VENV)
$(DOCS_MKDOCS) build -f docs/mkdocs.yml
$(DOCS_VENV):
python3 -m venv $(DOCS_VENV)
$(DOCS_PIP) install --upgrade pip
$(DOCS_PIP) install -r docs/requirements.txt
.PHONY: wordcount
wordcount:
@$(NX) run paper:wordcount
@@ -232,12 +253,28 @@ test:
count-lines:
@$(NX) run research:stats
all:
@$(NX) run paper:build
# Default artifact set for this repo: thesis PDF (same as pdf).
all: pdf
.PHONY: manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all
# Main defense reel (paper/defense/manim/render_defense); uses paper/defense/.venv when present
manim.defense:
@cd paper/defense/manim && ./render_defense full
manim.defense.hq:
@cd paper/defense/manim && ./render_defense full --quality qh
.PHONY: manim.render manim.render.all
manim.render:
@$(NX) run manim:render
manim.render.full:
@$(NX) run manim:render-full
manim.render.poster:
@$(NX) run manim:render-poster
manim.render.appendix:
@$(NX) run manim:render-appendix
manim.render.all:
@$(NX) run manim:render-all

View File

@@ -6,10 +6,12 @@
Agent-aware dynamic pricing research platform for studying how automated transaction orchestration changes pricing power, and for testing defenses that recover margin while protecting legitimate user experience.
[![Build PDF](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml/badge.svg)](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml)
[![Paper](https://img.shields.io/badge/Paper-PDF-red?logo=adobe-acrobat-reader)](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-sm.svg)](https://huggingface.co/datasets/velocitatem/whoclickedit)
[![TPU Research Cloud](https://img.shields.io/badge/TPU%20Research%20Cloud-TRC%20supported-4285F4?logo=googlecloud&logoColor=white)](https://sites.research.google/trc/faq/)
<p>
<a href="https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml"><img src="https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml/badge.svg" alt="Build PDF" style="vertical-align: middle;" /></a>
<a href="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf"><img src="https://img.shields.io/badge/Paper-PDF-red?logo=adobe-acrobat-reader" alt="Paper PDF" style="vertical-align: middle;" /></a>
<a href="https://huggingface.co/datasets/velocitatem/whoclickedit"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-sm.svg" alt="Dataset on Hugging Face" style="vertical-align: middle; position: relative; top: 1px;" /></a>
<a href="https://sites.research.google/trc/faq/"><img src="https://img.shields.io/badge/TPU%20Research%20Cloud-TRC%20supported-4285F4?logo=googlecloud&logoColor=white" alt="TPU Research Cloud" style="vertical-align: middle;" /></a>
</p>
**Live demos:** [Hotel](https://phantom-hotel.vercel.app) | [Airline](https://phantom-airline.vercel.app) | [Academic page](https://velocitatem.github.io/PHANTOM/)
@@ -140,7 +142,10 @@ flowchart LR
| `experiments/` | Data processing, ETL ideas, and analysis assets |
| `docker/` | Dockerfiles for platform services |
| `tests/e2e/` | Playwright end-to-end tests |
| `docs/` | Academic project page source |
| `docs/` | Academic project page (GitHub Pages root) + MkDocs config |
| `docs/src/` | Markdown sources for the operator documentation site |
| `docs/documentation/` | MkDocs build output (gitignored; run `make docs.platform`; served at `/documentation/` on Pages) |
| `SETUP.md` | Unified operator guide: stack, kernels, RL training, thesis refs by chapter |
## Operational notes
@@ -149,6 +154,11 @@ flowchart LR
- Research commands (`make train`, `make benchmark*`, `make train.agent`) auto-load `.env.sweep`.
- Paper builds call `paper/concat_code.sh` before compilation to flatten code into the appendix.
## Operator documentation
- Full setup guide (platform + research): [`SETUP.md`](SETUP.md)
- Hosted operator docs (after `make docs.platform`): […/PHANTOM/documentation/](https://velocitatem.github.io/PHANTOM/documentation/) on GitHub Pages
## Research artifacts
- Thesis PDF: `thesis-latest.pdf` or [hosted PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)

298
SETUP.md Normal file
View File

@@ -0,0 +1,298 @@
# PHANTOM: setup for operators and partners
This guide walks a team from **business context** (what you sell, how you price, what traffic you worry about) through a **running PHANTOM stack**, **behavioral kernels and contamination**, and **RL training / benchmarking**. The math lives in the thesis PDF; here we tie operations to that math without re-deriving it. References to the thesis use **chapter numbers** only (build the PDF locally if you need line-level citations).
**Thesis (PDF):** [thesis-latest.pdf](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
---
## 1. Who this is for / prerequisites
**Audience:** Engineers and researchers who run Docker, a Next.js app, and Python tooling; product or risk stakeholders who define experiment goals and acceptable UX tradeoffs.
**Skills:** Docker Compose, Node/npm, Python 3.8+, basic Kafka/Redis mental model.
**Decide up front:**
- **Vertical vs demo:** The repo ships `hotel` and `airline` storefront modes (`STORE_MODE`). Anything beyond that is custom integration work.
- **Data residency:** Event streams and training artifacts default to paths under the repo (overridable via `PHANTOM_`* env vars in `lib/config.py`). Decide where logs and models may live before you point production-like traffic at the stack.
- **Experiment governance:** Who may run human vs agent sessions, how sessions are labeled or weak-labeled for research, and retention policy for interaction logs.
### Theoretical implications
The formal model assumes each session is generated by a latent **actor class** $Y \in H,A$ (human vs agent). Your deployment choices implicitly assert **which sessions are valid for estimating human vs agent behavior** and whether experimental conditions are stable. If you mix exploratory QA traffic with labeled experiments without recording that fact, you blur the empirical partitions $D_H$ and $D_A$ that the methodology needs for transition kernels and contamination studies. See the **Introduction** (research questions) and **Methodology**, Problem Formalization, in the thesis PDF.
---
## 2. Business fit framing
**What PHANTOM is for:** Studying how **automated browsing and transaction orchestration** interact with **session-based pricing**: behavior generates a demand proxy $\hat{q}$; pricing policies map interaction history to prices; **Cost of Information (COI)** is the premium the platform can sustain above a floor when information is scarce. Agent-mediated **reconnaissance in one session** and **purchase in another** undermines that asymmetry; the thesis proves a **COI erosion** mechanism under many independent price queries.
**What you must supply:**
- A **product catalog** path: defaults assume Supabase-backed product data (`NEXT_PUBLIC_SUPABASE_URL`, `NEXT_PUBLIC_SUPABASE_ANON_KEY`).
- A plan for **interaction and price events** reaching the ingestion path (backend → Kafka) or an adapter you maintain.
- Clear **experiment goals:** e.g. compare human vs agent KPIs under the same task, measure margin under varying contamination $\alpha$.
### Theoretical implications
Aggregate demand in the thesis is a **mixture** over human and agent types with contamination $\alpha$ plus noise $\epsilon_t$; see the mixture demand discussion in **Chapter 3 (Methodology)**. COI is defined as $\mathbb{E}[P]-\underline{p}$; the **COI framework** and theorem in the same chapter explain why saturated agent querying collapses extractable premium. Your business scenario determines which **actions** enter $\hat{q}$ and how interpretable $\alpha$ is for your traffic.
---
## 3. Environment and secrets
**Bootstrap files (from repo root):**
```bash
npm install
cp .env.example .env
cp .env.sweep.example .env.sweep
```
**Core `.env` (platform + web + docker):** See `[.env.example](.env.example)`. You must also set the variables called out in `[README.md](README.md)` for a full stack: `NEXT_PUBLIC_SUPABASE_URL`, `NEXT_PUBLIC_SUPABASE_ANON_KEY`, `AIRFLOW_FERNET_KEY`, `AIRFLOW_SECRET_KEY` (and provider ports per your compose file).
**Training / sweeps (`.env.sweep`):** Used by `make train`, `make benchmark`, sweep agents. Typically `WANDB_API_KEY`, optional `WANDB_ENTITY` / `WANDB_PROJECT`, `GITHUB_TOKEN` for bootstrap flows, `SWEEP_ID` for W&B sweep workers. See `[.env.sweep.example](.env.sweep.example)`.
**Security:** Never commit real `.env` or `.env.sweep` files. Rotate keys if they leak.
### Theoretical implications
Splitting **online platform credentials** (ingestion, catalog, Kafka) from **offline training credentials** (W&B, cloud TPUs, GitHub tokens for workers) mirrors the **hybrid KappaLambda** data loop in the thesis: streaming observation vs batch / long-running training jobs. That split is named in the **Terminology** appendix of the thesis PDF.
---
## 4. Bring-up (commands)
Aligned with `[README.md](README.md)`:
```bash
npm install
cp .env.example .env
cp .env.sweep.example .env.sweep
# edit .env: Supabase, Airflow keys, etc.
make platform.up
make web.dev
```
**Sanity checks:**
| Endpoint | Role |
| ------------------------------------------------------------- | --------------------------------- |
| `http://localhost:3000` | Next.js storefront |
| `http://localhost:5000/health` | Backend ingest API |
| `http://localhost:5001/health` | Pricing provider |
| `http://localhost:8085` | Airflow UI (default compose port) |
| `http://localhost:8084` or configured `REDPANDA_CONSOLE_PORT` | Kafka console (see your `.env`) |
**Optional tests:** `make test.backend` (with venv/tooling as in Makefile); `make test.e2e` requires backend, web, and Airflow up per README.
### Theoretical implications
A correctly wired stack logs **trajectories** $\tau_s$ (sequences of events) and **price exposure** together. **Chapter 3** defines events $e_{s,k}=(a,i,t)$ and proxies $\hat{q}$ from weighted actions—without joint logging of behavior and quotes, you cannot recover the objects the theory reasons about (Problem Formalization).
---
## 5. Service map
```mermaid
flowchart LR
U[Human / Agent Browser] --> W[Next.js Web App]
W -->|Price requests| P[Pricing Provider]
W -->|Interaction events| B[Backend Ingest API]
B --> K[Kafka]
K --> A[Airflow + Worker Jobs]
A --> R[Redis Model Registry]
P -->|Session/global prices| W
E[Research Engine + Experiments] --> A
E --> R
```
**Ports (typical; confirm in `docker-compose` and `.env`):** `BACKEND_PORT` (5000), `PROVIDER_PORT` (5001), `KAFKA_PORT`, `REDIS_PORT`, Airflow `AIRFLOW_WEBSERVER_PORT` (8085 default), Redpanda console.
### Theoretical implications
The platform **observes** behavioral proxies and quoted prices, not the latent demand curve $d(p\mid\theta)$. The distinction between $\hat{q}$ and true demand is explicit in **Chapter 3**. Misattributing proxy noise to “true” elasticity breaks both estimation and any causal story about COI.
---
## 6. Tailoring to your business
**Storefront mode:** `STORE_MODE=hotel` or `airline` (see `[web/src/lib/config.ts](web/src/lib/config.ts)` and env). This switches catalog and UI, not the core ingestion pattern.
**API base / environment:** `NEXT_PUBLIC_API_BASE`, `NEXT_PUBLIC_APP_ENV` (validated in `config.ts`).
**Paths for data and runs:** Override with `PHANTOM_DATA_DIR`, `PHANTOM_SIM_RUNS_DIR`, `PHANTOM_MODEL_REGISTRY_DIR`, `PHANTOM_COLLECTED_DATA_DIR`, etc. (`[lib/config.py](lib/config.py)`).
**Scope:** A new vertical (custom product ontology, checkout rules, pricing rules) means **new UI, events, and possibly new reward features** in the engine. Budget engineering time; the repo is a research platform, not a turnkey SaaS skin for arbitrary catalogs without code changes.
### Theoretical implications
Transition kernels $\hat{\mathcal{T}}_H,\hat{\mathcal{T}}_A$ are estimated on a **finite action / state space** derived from your instrumentation. Changing catalog depth or event taxonomy changes the MDP state space; old kernel estimates are not portable. See the transition kernel discussion in **Chapter 3**.
---
## 7. Data collection and experiments
**Flow:** Browser → backend → **Kafka** → downstream consumers (Airflow DAGs, notebooks, ETL under `experiments/`). Ensure **session identity**, **item identifiers**, and **action types** are consistent enough to build trajectories.
**Weak labels:** The thesis discusses partitioning data into human vs agent subsets for MLE transition counts. In production you may only have heuristic labels—document bias explicitly.
### Theoretical implications
Distinguishability (sub-question SQ1 in the **Introduction**) asks whether $H$ vs $A$ is identifiable from behavior alone. Your labeling and experimental design determine whether $\Delta_H,\Delta_A$ and $f(\tau)$ are meaningful or dominated by noise. Symbols appear in the **Terminology** appendix ($\Delta_H,\Delta_A$, $f(\tau)$, contamination generator $\mathcal{G}(\alpha)$).
---
## 8. Transition kernels and agent scoring (theory → practice)
**Theory:** Sessions yield trajectories $\tau_s$. For each actor class $y\inH,A$, the thesis estimates a **Markov transition kernel** by counting transitions and normalizing (MLE):
$$
\hat{P}(s' \mid s) = \frac{N(s,s')}{\sum_k N(s,k)}
$$
Human and agent prototypes $\hat{\mathcal{T}}_H,\hat{\mathcal{T}}_A$ support comparing an empirical kernel from a partial trajectory to prototypes (e.g. KL-style divergences $\Delta_H,\Delta_A$) and mapping to a **weak agent probability** $f(\tau)$. See **Chapter 3** and the **Terminology** appendix.
**Code:** `[engine/lib/coi.py](engine/lib/coi.py)` (`compute_agent_probability`: empirical transition counts vs human/agent reference dicts, KL-style terms, mapped via `[lib/agent_probability.py](lib/agent_probability.py)`).
**Optional narrative:** `[blog/02-behavioral-fingerprinting.md](blog/02-behavioral-fingerprinting.md)` walks a concrete study design (not required for operators).
### Theoretical implications
If reference kernels are fit on **stale** or **mislabeled** partitions, $\Delta_H-\Delta_A$ is not interpretable as distinguishability. Ground claims in SQ1 (**Introduction**) and the kernel subsection of **Chapter 3**.
---
## 9. Contamination generator $\mathcal{G}(\alpha)$
**Theory:** Given clean trajectories, $\mathcal{G}(\alpha)$ injects synthetic agent trajectories until the effective mixture reaches contamination $\alpha\in[0,1]$, defining training scenarios for robust policies (**Chapter 3**). Catalog-scale block expansion of kernels is discussed there with validation caveats—treat large product spaces as **research-grade** until your team signs off.
**Code:** `[engine/engine.py](engine/engine.py)``MarketEngine` mixes human/agent demand, uses `get_adjusted_transitions` / `sample_behavior_from_transitions`, and `alpha` when combining actor types and building demand proxies (`estimate_demand`). This is the **simulator** path, not a drop-in replacement for your production database.
### Theoretical implications
$\alpha$ in mixture $Q(p)$ is **agentic demand contribution** in the formal model, not necessarily “bot share of page views” unless your instrumentation equates them. Mismeasuring $\alpha$ biases robust objectives tied to a fixed contamination level.
---
## 10. Training and evaluation — local workflow
**Environment:** Python venv via Nx (`make install` / `nx run research:install`). Training commands load `.env.sweep`.
```bash
make train LOCAL_TRAIN_ARGS='--algo ppo --total-timesteps 50000'
make benchmark LOCAL_BENCHMARK_ARGS='--tiers static,surge,linear,qtable,ppo --alpha-values 0.0,0.3 --episodes 3 --no-wandb'
make benchmark.simple
```
Entrypoints: `[engine/train.py](engine/train.py)`, `[engine/benchmark.py](engine/benchmark.py)`, `[engine/spec.py](engine/spec.py)` (Nx wraps these—see `project.json` / research targets).
**Artifacts:** `[lib/config.py](lib/config.py)``PHANTOM_SIM_RUNS_DIR` (default `sim/rl/runs`), `PHANTOM_MODEL_REGISTRY_DIR`, etc.
**TensorBoard (optional):** `[docker-compose.yml](docker-compose.yml)` includes `tensorboard-rl` on host port **6007** (`./sim/rl/runs`) and `tensorboard-ml` on **6006** (`./experiments/ml/runs`).
### Theoretical implications
Local runs instantiate the **offline defense gym**: policies trained on simulator-induced distributions approximate the DR-RL narrative in **Chapter 3**, but hyperparameters ($\lambda$ on COI leakage, $\eta$ on UX, robust radius) change the effective ambiguity set. Cross-check `engine/` against the thesis before claiming figure-for-figure replication.
---
## 11. Training and evaluation — remote / scaled deployment
For **research at scale** (cloud quota and secrets required):
| Mechanism | Role |
| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
| `[submit_ray_job.sh](submit_ray_job.sh)` | Ray jobs with `.env` injected; `RAY_MODE=single|distributed|benchmark|sweep`. Set the scripts `ROOT` to your clone path. |
| `make tpu.ray.bootstrap` / `tpu.ray.`* | TPU Ray bootstrap (`TPU_CONF`, e.g. `tpu_orchestration/configs/v4_spot_us.conf`). |
| `make train.agent` / `make benchmark.agent` | W&B sweeps: `SWEEP_ID` in `.env.sweep`. |
| `make train.bootstrap` | Worker bootstrap: `REPO_URL`, `SWEEP_ID`, `GITHUB_TOKEN`. |
| `make docker.train.publish` | Trainer image (`TRAIN_IMAGE_REF` in Makefile). |
See `submit_ray_job.sh` for env vars (`WANDB_*`, `PHANTOM_*` TPU toggles).
### Theoretical implications
Distributed training does not change the **definitions** of the Stackelberg game or Wasserstein ambiguity; it changes compute and variance of empirical estimates. Align random seeds and data protocol across nodes or split results explicitly—otherwise you mix distributions in a way a single empirical law $\hat{P}_N$ in the thesis does not describe.
---
## 12. Evaluation, artifacts, and audit trail
**Benchmarks:** `make benchmark`* sweeps tiers and $\alpha$; CLI includes robustness knobs (see default `BENCHMARK_ARGS` in `submit_ray_job.sh`: `--robust-radius`, `--lambda-coi`, `--eta-ux`, etc.).
**Audit trail:** Store `git` SHA, CLI argv, non-secret `.env.sweep` keys, and W&B run IDs with published tables. For scientific claims, cite **Chapters 45 (Results, Discussion)** in the thesis PDF.
### Theoretical implications
Evaluation quality equals **simulator fidelity** plus **contamination modeling**. Separate theorem statements (assumption-based) from empirical curves (`engine`-dependent).
---
## 13. Operational suggestions
- **Staging:** Non-production namespaces; separate Kafka topics and Supabase projects where possible.
- **Rate limits / abuse:** Protect ingest endpoints; respect participant privacy.
- **Human vs agent sessions:** Comparable cohorts; record experimental condition in metadata.
- **Contracts:** `tests/e2e/` encodes minimal flows—use when APIs change.
### Theoretical implications
Non-stationary noise $\epsilon_t$ and drifting $\alpha$ confound benchmark interpretation. **Chapter 3** discusses mixture identification: isolate treatments when possible and document confounders when not.
---
## 14. Roadmap and gaps
**In repo:** Local dockerized stack, demo verticals, engine benchmarks, documented env and paths.
**Usually custom:** Production catalog without Supabase, identity/fraud layers, legal review of logging, Kafka/Airflow SLAs, hardening the pricing provider for real money.
**Thesis vs code:** The PDF is the **spec**; not every robustness term or large-catalog kernel construction is production-verified—see caveats in **Chapter 3**.
### Theoretical implications
Theorems in the thesis can be **stronger** than what observational firm logs support. The COI result assumes a clean experimental reading of the pricing policy; live market data may only support weaker claims.
---
## 15. Theory and thesis cross-references (quick index)
Use the **PDF table of contents** with these anchors:
| Topic | Thesis location |
| -------------------------------------------------------------------------- | ----------------------------------------------------- |
| Research questions (margin, distinguishability, contamination, mitigation) | **Introduction** |
| Sessions, events, $\hat{q}$, mixture $Q(p)$, $\alpha$ | **Chapter 3** — Problem Formalization, mixture demand |
| COI definition and erosion theorem | **Chapter 3** — COI framework |
| Transition kernels, MLE, $\mathcal{G}(\alpha)$ | **Chapter 3** |
| DR-RL, ambiguity sets, Stackelberg | **Chapter 3** |
| Symbol glossary (COI leakage, $f(\tau)$, UX, surrogates) | **Appendix — Terminology** |
| Empirical results and limitations | **Chapters 45** |
---
## 16. Quick file index (code)
| File | Role |
| ---------------------------------------------------------------------------------- | -------------------------------------------------- |
| `[engine/lib/coi.py](engine/lib/coi.py)` | KL-style trajectory comparison; agent probability. |
| `[engine/engine.py](engine/engine.py)` | `MarketEngine`, mixture, demand proxy path. |
| `[lib/agent_probability.py](lib/agent_probability.py)` | Divergence → probability score. |
| `[lib/config.py](lib/config.py)` | Paths and ports for artifacts. |
| `[engine/train.py](engine/train.py)`, `[engine/benchmark.py](engine/benchmark.py)` | CLI entrypoints. |
| `[tpu_orchestration/](tpu_orchestration/)` | TPU configs and helpers. |
Many offline benchmarks run without a storefront once the research Python environment is installed; connecting production trajectories to kernel estimation still requires aligned instrumentation.

View File

@@ -183,6 +183,14 @@
</div>
<i class="fas fa-external-link-alt"></i>
</a>
<a href="documentation/" class="work-item">
<div class="work-info">
<h5>Documentation</h5>
<p>Operator setup, configuration, architecture, and research pipeline (MkDocs).</p>
<span class="work-venue">Platform</span>
</div>
<i class="fas fa-book"></i>
</a>
<a href="https://github.com/velocitatem/p4p" class="work-item" target="_blank">
<div class="work-info">
<h5>P4P Interaction Layer</h5>

53
docs/mkdocs.yml Normal file
View File

@@ -0,0 +1,53 @@
site_name: PHANTOM Platform
site_description: Operator and research documentation for the PHANTOM dynamic pricing research platform.
site_url: https://velocitatem.github.io/PHANTOM/documentation/
site_author: Daniel Rösel
repo_url: https://github.com/velocitatem/PHANTOM
repo_name: velocitatem/PHANTOM
docs_dir: src
site_dir: documentation
strict: true
theme:
name: material
palette:
- scheme: default
primary: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
- scheme: slate
primary: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.instant
- navigation.tracking
- content.code.copy
- search.suggest
- search.highlight
nav:
- Home: index.md
- Setup: platform-setup.md
- Business overview: business.md
- Architecture: architecture.md
- Configuration: configuration.md
- Glossary: glossary.md
- Roadmap & implementation notes: roadmap.md
markdown_extensions:
- pymdownx.snippets:
base_path:
- ..
- pymdownx.superfences
- admonition
- tables
- toc:
permalink: true
plugins:
- search

1
docs/requirements.txt Normal file
View File

@@ -0,0 +1 @@
mkdocs-material>=9.5,<10

30
docs/src/architecture.md Normal file
View File

@@ -0,0 +1,30 @@
# Architecture
## System map
```mermaid
flowchart LR
U[Human / Agent Browser] --> W[Next.js Web App]
W -->|Price requests| P[Pricing Provider]
W -->|Interaction events| B[Backend Ingest API]
B --> K[Kafka]
K --> A[Airflow + Worker Jobs]
A --> R[Redis Model Registry]
P -->|Session/global prices| W
E[Research Engine + Experiments] --> A
E --> R
```
## Event and training path (conceptual)
1. **Online:** The browser emits events; the backend publishes to **Kafka**; schedulers and workers consume for ETL and model registry updates.
2. **Offline:** Notebooks and scripts under `experiments/` transform logs; `**engine/`** runs simulations, training, and benchmarks; artifacts land under paths from `[lib/config.py](https://github.com/velocitatem/PHANTOM/blob/main/lib/config.py)`.
3. **Feedback:** Trained or rule-based policies surface through the **pricing provider** to the web app.
## Where to read more
- Ports and health checks: [README](https://github.com/velocitatem/PHANTOM/blob/main/README.md) and [Configuration](configuration.md).
- Formal notation for sessions, $\hat{q}$, and mixture demand: **Chapter 3 (Methodology)** in the thesis PDF.

21
docs/src/business.md Normal file
View File

@@ -0,0 +1,21 @@
# Business overview
PHANTOM targets **platform operators and researchers** who need to:
1. **Observe** session-level behavior and price quotes together (trajectories and policies—not just clicks).
2. **Separate** human-driven demand signals from agent-mediated reconnaissance where possible (distinguishability and contamination \alpha in the thesis).
3. **Evaluate** pricing policies that remain useful when **Cost of Information (COI)** is under pressure from automated querying (formal COI framework and theorem in the thesis PDF).
## What this product is not
- A drop-in fraud API that returns “bot score” for every request without your event schema.
- A certified compliance guarantee for regulated pricing: it is a **research stack** with configurable experiments.
- A hosted SaaS: you run the stack (or adapt components) under your infrastructure policy.
## Self-service story (ideal path)
A team connects their **catalog** (today: Supabase-backed flows in this repo), streams **interaction events** through the ingest path, runs **labeled or weak-labeled** human vs agent sessions, estimates **behavioral kernels**, varies **contamination** in simulation, and **trains or benchmarks** robust policies via `engine/`. Steps and caveats are in [Setup](platform-setup.md) (same content as root `SETUP.md`).
## Thesis link
Problem statement, contributions, and research questions: **Introduction** and abstract in the [thesis PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf).

63
docs/src/configuration.md Normal file
View File

@@ -0,0 +1,63 @@
# Configuration reference
This page condenses tables from `[README.md](https://github.com/velocitatem/PHANTOM/blob/main/README.md)` and points to code. Authoritative env templates: `[.env.example](https://github.com/velocitatem/PHANTOM/blob/main/.env.example)`, `[.env.sweep.example](https://github.com/velocitatem/PHANTOM/blob/main/.env.sweep.example)`.
## Core runtime (`.env`)
| Variable | Purpose | Typical value |
| ------------------------------- | ------------------------------ | ----------------------- |
| `STORE_MODE` | Web mode (`hotel` / `airline`) | `hotel` |
| `BACKEND_PORT` | Backend API | `5000` |
| `PROVIDER_PORT` | Pricing provider | `5001` |
| `KAFKA_HOST` | Kafka broker host | `localhost` |
| `KAFKA_PORT` | Kafka port | `9092` |
| `REDIS_PORT` | Redis port | `6377` |
| `REDPANDA_CONSOLE_PORT` | Kafka UI | `8084` (see compose) |
| `NEXT_PUBLIC_SUPABASE_URL` | Catalog / data | required for full stack |
| `NEXT_PUBLIC_SUPABASE_ANON_KEY` | Catalog / data | required |
| `AIRFLOW_FERNET_KEY` | Airflow | required |
| `AIRFLOW_SECRET_KEY` | Airflow web | required |
Web client validation: `[web/src/lib/config.ts](https://github.com/velocitatem/PHANTOM/blob/main/web/src/lib/config.ts)`.
## Training / sweeps (`.env.sweep`)
| Variable | Purpose |
| --------------- | ----------------------------------------------- |
| `WANDB_API_KEY` | Weights & Biases |
| `WANDB_ENTITY` | Optional override |
| `WANDB_PROJECT` | Project name (default `capstone`) |
| `GITHUB_TOKEN` | Bootstrap / workers |
| `SWEEP_ID` | Sweep agents (`train.agent`, `benchmark.agent`) |
## Path overrides (`PHANTOM_*`)
Defined in `[lib/config.py](https://github.com/velocitatem/PHANTOM/blob/main/lib/config.py)`:
| Variable | Default (conceptual) |
| ---------------------------- | ----------------------------------- |
| `PHANTOM_DATA_DIR` | `data/` |
| `PHANTOM_EXPERIMENTS_DIR` | `experiments/` |
| `PHANTOM_SIM_RUNS_DIR` | `sim/rl/runs` |
| `PHANTOM_MODEL_REGISTRY_DIR` | `data/models` |
| `PHANTOM_COLLECTED_DATA_DIR` | `experiments/agents/collected_data` |
## Makefile entrypoints
| Goal | Command |
| ---------------- | ------------------------------------------- |
| Platform up/down | `make platform.up` / `make platform.down` |
| Web dev | `make web.dev` |
| Train | `make train` (+ `LOCAL_TRAIN_ARGS`) |
| Benchmark | `make benchmark` (+ `LOCAL_BENCHMARK_ARGS`) |
| Docs site | `make docs.platform` |
See `make help` for the full list.

17
docs/src/glossary.md Normal file
View File

@@ -0,0 +1,17 @@
# Glossary
Short definitions point to the thesis **Terminology** appendix in the [PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf) for full precision.
| Term | Meaning (operational) |
| --- | --- |
| **COI (Cost of Information)** | Expected price premium above a floor under the platforms policy; thesis KPI for pricing power. |
| **Trajectory \(\tau_s\)** | Ordered session events used as the behavioral record. |
| **Demand proxy \(\hat{q}\)** | Weighted aggregation of actions—what the platform observes instead of true demand. |
| **Contamination \(\alpha\)** | Agent share in the mixture demand model (thesis); not automatically “% of bots” in raw logs. |
| **Transition kernel \(\hat{\mathcal{T}}\)** | MLE Markov model over behavioral states / events for class \(H\) or \(A\). |
| **\(\Delta_H,\Delta_A\)** | Divergence scores vs human/agent prototypes (thesis notation). |
| **\(f(\tau)\)** | Weak agent probability from trajectory (implementation: `engine/lib/coi.py`). |
| **\(\mathcal{G}(\alpha)\)** | Contamination generator: synthetic agent trajectories to reach mixture level \(\alpha\). |
| **DR-RL** | Distributionally robust reinforcement learning training narrative in the thesis. |
| **Ambiguity set / Wasserstein** | Robust optimization neighborhood around an empirical demand law. |
| **KappaLambda architecture** | Thesis term for streaming (online) vs batch/offline learning loops. |

21
docs/src/index.md Normal file
View File

@@ -0,0 +1,21 @@
# PHANTOM platform documentation
Welcome. This site mirrors the **operator and research** documentation for the PHANTOM repository: a research platform for studying **dynamic pricing** under **LLM-mediated browsing and transaction orchestration**, with ties to the academic thesis.
## Start here
| Document | Audience |
| --- | --- |
| [Setup](platform-setup.md) | Full walkthrough: Docker/web/ingest, kernels, contamination, RL training, and audit—content from `SETUP.md` in the repo. |
| [Configuration reference](configuration.md) | Env vars, paths, and Makefile entrypoints in one place. |
| [Roadmap & implementation notes](roadmap.md) | What is turnkey vs research-grade; thesis vs code. |
## Canonical sources in the repo
- Thesis PDF: [thesis-latest.pdf](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
- Root onboarding: single file [`SETUP.md`](https://github.com/velocitatem/PHANTOM/blob/main/SETUP.md) (included on this site via snippets—edit that file to change content).
- Quick start and command tables: [`README.md`](https://github.com/velocitatem/PHANTOM/blob/main/README.md)
## Academic project page
The research landing page (figures, abstract, links) is the site root on GitHub Pages: [velocitatem.github.io/PHANTOM/](https://velocitatem.github.io/PHANTOM/). Open **Documentation** in the Project Links menu there to return to this subsite.

View File

@@ -0,0 +1,5 @@
# Setup
The content below is included from the repository root file `SETUP.md` (single source of truth: platform bring-up, kernels, contamination, RL training, and thesis pointers by chapter).
--8<-- "SETUP.md"

26
docs/src/roadmap.md Normal file
View File

@@ -0,0 +1,26 @@
# Roadmap & implementation notes
This page is the **honesty pass** from the documentation plan: what clients can expect today versus what remains research-heavy.
## Turnkey in this repository
- **Local stack:** Docker Compose services for backend, Kafka, Redis, Airflow, pricing provider, etc.; Next.js via `make web.dev` (see [Platform setup](platform-setup.md)).
- **Demo verticals:** `hotel` and `airline` storefront modes.
- **Engine:** Benchmarks and training entrypoints (`make train`, `make benchmark`), KL-based agent scoring in `[engine/lib/coi.py](https://github.com/velocitatem/PHANTOM/blob/main/engine/lib/coi.py)`, simulator mixing in `[engine/engine.py](https://github.com/velocitatem/PHANTOM/blob/main/engine/engine.py)`.
- **Orchestration hooks:** Ray/TPU scripts (`submit_ray_job.sh`, `make tpu.ray.`*), W&B sweep agents, Docker trainer publish target.
## Usually requires custom engineering
- **Non-Supabase catalog** or checkout flows without adapting the web + backend contracts.
- **Production SLAs** on Kafka, schema registry, or PII boundaries for your jurisdiction.
- **Tight coupling** to a legacy pricing engine without mapping its API to the provider abstraction.
## Thesis vs code
- The **thesis** states theorems and constructions (COI erosion, kernels, \mathcal{G}(\alpha), DR-RL).
- The **codebase** implements a **subset** of that story for experiments: verify CLI flags and simulator assumptions before claiming 1:1 equivalence with every equation.
- **Catalog-scale kernel expansion** is discussed in **Chapter 3** with explicit validation caveats—do not assume row-stochasticity and Markov structure are automatically preserved at full product cardinality without review.
## Suggested client messaging
Position PHANTOM as a **reproducible research and evaluation stack** for agent-aware pricing, with a path to custom integration—not as a black-box “turn on anti-agent pricing” product without data and engineering investment.

View File

@@ -41,7 +41,7 @@
<!-- Markers p and E[P] -->
<line x1="150" y1="340" x2="150" y2="160" stroke="#E37862" stroke-width="2" stroke-dasharray="6,4"/>
<text x="150" y="375" font-family="Georgia" font-style="italic" font-size="22" fill="#E37862" text-anchor="middle">p</text>
<text x="150" y="375" font-family="Georgia" font-style="italic" font-size="22" fill="#E37862" text-anchor="middle"><tspan text-decoration="underline">p</tspan></text>
<line x1="260" y1="340" x2="260" y2="160" stroke="#85B589" stroke-width="2" stroke-dasharray="6,4"/>
<text x="260" y="375" font-family="Georgia" font-style="italic" font-size="22" fill="#85B589" text-anchor="middle">E[P]</text>
@@ -49,7 +49,7 @@
<!-- COI Annotation -->
<line x1="150" y1="150" x2="260" y2="150" stroke="#E37862" stroke-width="2" marker-start="url(#arrow)" marker-end="url(#arrow)"/>
<text x="310" y="138" font-size="16" fill="#E37862" text-anchor="middle">average information rent</text>
<text x="310" y="118" font-family="Georgia" font-style="italic" font-size="22" fill="#E37862" font-weight="bold" text-anchor="middle">COI := E[P] - p</text>
<text x="310" y="118" font-family="Georgia" font-style="italic" font-size="22" fill="#E37862" font-weight="bold" text-anchor="middle">COI = E[P] - <tspan text-decoration="underline">p</tspan></text>
</g>
<!-- Bottom: Agent Saturation -->

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -1,12 +1,15 @@
import numpy as np
from typing import Dict
from lib.agent_probability import DEFAULT_AGENT_PRIOR, estimate_agent_probability
def compute_agent_probability(
trajectory: list,
human_transitions: Dict,
agent_transitions: Dict,
temperature: float = 1.0,
prior_agent: float = DEFAULT_AGENT_PRIOR,
) -> float:
"""estimate agent probability via KL divergence between trajectory transitions and reference models
@@ -18,10 +21,10 @@ def compute_agent_probability(
agent_transitions: reference transition dict from agent MDP (event->event->prob)
returns:
agent probability in [0, 1] via softmax over KL divergences
agent probability in [0, 1] via sigma((delta_h - delta_a) / T)
"""
if len(trajectory) < 2:
return 0.0 # insufficient data, assume human
return float(prior_agent)
# build empirical transition distribution from trajectory
trans_counts = {}
@@ -54,11 +57,12 @@ def compute_agent_probability(
kl_human = kl_div(empirical, human_transitions)
kl_agent = kl_div(empirical, agent_transitions)
# convert to probability via softmax (lower KL = higher prob)
t = float(max(temperature, 1e-6))
exp_h = np.exp(-kl_human / t)
exp_a = np.exp(-kl_agent / t)
return float(exp_a / (exp_h + exp_a + 1e-10))
return estimate_agent_probability(
delta_h=kl_human,
delta_a=kl_agent,
temperature=temperature,
prior_agent=prior_agent,
)
def extract_purchases(trajectories: list) -> Dict[int, int]:

View File

@@ -7,10 +7,9 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Sequence
import joblib
import numpy as np
from experiments.ml.arch import featurize_trajectory
from lib.agent_probability import DEFAULT_AGENT_PRIOR, estimate_agent_probability
DEFAULT_ARTIFACT_DIR = Path("data/separability")
@@ -18,11 +17,7 @@ DEFAULT_ARTIFACT_DIR = Path("data/separability")
@dataclass
class SeparabilityArtifacts:
scaler: object
classifier: object
states: List[str]
event_transitions: Dict[str, Dict[str, float]]
feature_dim: int
def _normalize_events(raw_events: Sequence[object]) -> List[object]:
@@ -36,7 +31,9 @@ def _normalize_events(raw_events: Sequence[object]) -> List[object]:
return events
def _event_transition_distribution(events: Sequence[object]) -> Dict[str, Dict[str, float]]:
def _event_transition_distribution(
events: Sequence[object],
) -> Dict[str, Dict[str, float]]:
counts: Dict[str, Dict[str, int]] = {}
for src_evt, dst_evt in zip(events, events[1:]):
src_name = getattr(src_evt, "eventName", "unknown")
@@ -47,11 +44,15 @@ def _event_transition_distribution(events: Sequence[object]) -> Dict[str, Dict[s
distribution: Dict[str, Dict[str, float]] = {}
for src, dsts in counts.items():
total = float(sum(dsts.values()))
distribution[src] = {dst: val / total for dst, val in dsts.items()} if total else {}
distribution[src] = (
{dst: val / total for dst, val in dsts.items()} if total else {}
)
return distribution
def _kl_divergence(p: Dict[str, Dict[str, float]], q: Dict[str, Dict[str, float]]) -> float:
def _kl_divergence(
p: Dict[str, Dict[str, float]], q: Dict[str, Dict[str, float]]
) -> float:
eps = 1e-10
total = 0.0
for src, dsts in p.items():
@@ -61,28 +62,28 @@ def _kl_divergence(p: Dict[str, Dict[str, float]], q: Dict[str, Dict[str, float]
return float(total)
def load_artifacts(artifact_dir: Path | str = DEFAULT_ARTIFACT_DIR) -> SeparabilityArtifacts:
def load_artifacts(
artifact_dir: Path | str = DEFAULT_ARTIFACT_DIR,
) -> SeparabilityArtifacts:
artifact_dir = Path(artifact_dir)
scaler_path = artifact_dir / "scaler.joblib"
model_path = artifact_dir / "classifier.joblib"
metadata_path = artifact_dir / "metadata.json"
if not (scaler_path.exists() and model_path.exists() and metadata_path.exists()):
if not metadata_path.exists():
raise FileNotFoundError(
f"Separability artifacts not found in {artifact_dir}. Run sim.strong_learner.train first."
f"Separability metadata not found in {artifact_dir}. Provide metadata.json with event transitions."
)
scaler = joblib.load(scaler_path)
classifier = joblib.load(model_path)
with open(metadata_path, "r", encoding="utf-8") as fin:
metadata = json.load(fin)
transitions = metadata.get("event_transitions")
if not isinstance(transitions, dict):
raise ValueError(
"metadata.json must contain an 'event_transitions' object with 'human' and 'agent' kernels"
)
return SeparabilityArtifacts(
scaler=scaler,
classifier=classifier,
states=list(metadata["reference_states"]),
event_transitions=metadata["event_transitions"],
feature_dim=int(metadata["feature_dim"]),
event_transitions=transitions,
)
@@ -92,37 +93,44 @@ def score_session(
) -> dict:
events = _normalize_events(raw_events)
if not events:
return {"prob_agent": 0.0, "delta_h": 0.0, "delta_a": 0.0}
reference_mdp = {"states": artifacts.states}
features = featurize_trajectory(events, mdp=reference_mdp, input_dim=artifacts.feature_dim)
scaled = artifacts.scaler.transform(features.reshape(1, -1))
prob_agent = float(artifacts.classifier.predict_proba(scaled)[0, 1])
return {
"prob_agent": float(DEFAULT_AGENT_PRIOR),
"delta_h": 0.0,
"delta_a": 0.0,
"gap": 0.0,
}
session_dist = _event_transition_distribution(events)
delta_h = _kl_divergence(session_dist, artifacts.event_transitions.get("human", {}))
delta_a = _kl_divergence(session_dist, artifacts.event_transitions.get("agent", {}))
gap = float(delta_h - delta_a)
prob_agent = estimate_agent_probability(delta_h=delta_h, delta_a=delta_a)
return {
"prob_agent": prob_agent,
"delta_h": delta_h,
"delta_a": delta_a,
"gap": gap,
}
def estimate_alpha(prob_agent: float, delta_h: float, delta_a: float, temperature: float = 1.0) -> float:
divergence_mass = delta_h + delta_a
if divergence_mass <= 1e-8:
return float(prob_agent)
ratio = delta_a / divergence_mass
blended = 0.5 * prob_agent + 0.5 * ratio
if temperature <= 0:
return float(np.clip(blended, 0.0, 1.0))
scaled = 1.0 / (1.0 + np.exp(-temperature * (blended - 0.5)))
return float(np.clip(scaled, 0.0, 1.0))
def estimate_alpha(
prob_agent: float,
delta_h: float,
delta_a: float,
temperature: float = 1.0,
prior_agent: float = DEFAULT_AGENT_PRIOR,
) -> float:
_ = prob_agent
return estimate_agent_probability(
delta_h=delta_h,
delta_a=delta_a,
temperature=temperature,
prior_agent=prior_agent,
)
def score_sessions(raw_sessions: Iterable[Sequence[object]], artifacts: SeparabilityArtifacts) -> List[dict]:
def score_sessions(
raw_sessions: Iterable[Sequence[object]], artifacts: SeparabilityArtifacts
) -> List[dict]:
return [score_session(events, artifacts) for events in raw_sessions]

View File

@@ -1,6 +1,26 @@
$pdf_mode = 1;
$pdflatex = 'pdflatex -synctex=1 -interaction=nonstopmode -file-line-error %O %S';
$bibtex_use = 2; # run bibtex when needed
$bibtex = 'bibtex %O %B';
$bibtex_use = 2; # run biber when biblatex .bcf changes
# biber cwd is paper/build; scripts/nx_paper.sh symlinks ../build/bib -> ../src/bib so
# datasources log as bib/references.bib and latexmk's -e check works from paper/src
$biber = 'biber %O %S';
# Stale latexmk db: biblatex uses biber + .bcf, but the fdb can keep a "bibtex" rule after a bad
# run. Then biber never runs and citations stay undefined. Read whole fdb (small) so the rule
# line is never missed after a long dependency list.
for my $job (qw(main main-genpop summary)) {
my $bcf = "../build/$job.bcf";
my $bbl = "../build/$job.bbl";
my $fdb = "../build/$job.fdb_latexmk";
next unless -e $fdb && -e $bcf;
my $drop = !-e $bbl;
if ( !$drop && open my $fh, '<', $fdb ) {
local $/;
my $body = <$fh>;
close $fh;
$drop = 1 if defined $body && $body =~ /\["bibtex $job"\]/;
}
unlink $fdb if $drop;
}
$pdf_previewer = 'zathura %O %S';
$clean_ext = 'synctex.gz bbl bcf run.xml fls fdb_latexmk glg glo gls ist blg lof lot out toc';

View File

@@ -1,2 +0,0 @@
manim>=0.18,<1
numpy>=1.24

View File

@@ -44,6 +44,23 @@
"command": "bash scripts/nx_paper.sh build-arxiv",
"cwd": "."
}
},
"build-summary": {
"executor": "nx:run-commands",
"outputs": [
"{projectRoot}/build/summary.pdf"
],
"options": {
"command": "bash scripts/nx_paper.sh build-summary",
"cwd": "."
}
},
"watch-summary": {
"executor": "nx:run-commands",
"options": {
"command": "bash scripts/nx_paper.sh watch-summary",
"cwd": "."
}
}
},
"tags": [

View File

@@ -16,7 +16,13 @@
"chapters/04-results"
"chapters/05-discussion"
"chapters/06-conclusion"
"chapters/acknowledgements"
"article"
"art12"))
"art12")
(LaTeX-add-labels
"app:compute_budget"
"tab:compute_derivation"
"app:kl_zeros"
"app:revelation_log"))
:latex)

View File

@@ -7,18 +7,19 @@
%% \end{figure}
\section{Introduction}
\label{sec:introduction}
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove distinguishability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned distinguishability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove distinguishability) as a guiding teacher for downstream mitigation of contamination by non-human entities, translation of such learned distinguishability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned distinguishability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a cutoff set at the date of March 1st 2026.}
This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned distinguishability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium. \footnote{Given the rapid evolution of the field we acknowledge all developments with a knowledge cutoff set at the date of March 1st 2026.}
\subsection{Motivation and Market Context}
The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \parencite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \parencite{xie_osworld_2024}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \parencite{markntel_advisors_global_2025}.
The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \parencite{imperva_rapid_2025}.
The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose user experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \parencite{imperva_rapid_2025}.
The industry has already seen legal action in cases like Amazon against Perplexity \parencite{ghaffary_amazon_2025}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
The industry has already seen legal action in cases like Amazon against Perplexity \parencite{ghaffary_amazon_2025}, stemming from the difficulty of identifying traffic from hybrid systems like the Comet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. Our exploration of this field opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
\subsection{Solution Space Overview}
Dynamic pricing systems, as presented by \textcite{mueller_low-rank_2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented by \textcite{amjad_censored_2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.
@@ -26,13 +27,14 @@ Dynamic pricing systems, as presented by \textcite{mueller_low-rank_2019}, often
We formally define interaction data as coming from some actor which can either be an agent ($A$) or human ($H$). For purposes of this research, an agent is an algorithmic loop with the ability to access a web platform and perform actions such as clicks, scrolls, and input field fills. The loop terminates when the internal large language model judges the provided task definition as complete. A detailed breakdown can be found in \cref{algagent-loop}.
\subsection{Research Questions}
\label{sec:research_questions}
This dissertation is organized around one main research question and three supporting sub-questions:
This dissertation is organized around one main research question and three supporting pillar questions:
\begin{enumerate}
\item[\textbf{Main RQ}] How can dynamic pricing systems preserve margin integrity when transaction orchestration is increasingly mediated by non-human agents?
\item[\textbf{SQ1}] \textit{Distinguishability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
\item[\textbf{SQ2}] \textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
\item[\textbf{SQ3}] \textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
\item[\textbf{SQ1}] \hypertarget{sq1}{}\textit{Distinguishability}: Can agent and human sessions be reliably distinguished from behavioral interaction signals alone, without relying on network-level or device fingerprinting?
\item[\textbf{SQ2}] \hypertarget{sq2}{}\textit{Theoretical Impact}: What is the formal relationship between agent contamination levels and the erosion of pricing power in dynamic pricing systems?
\item[\textbf{SQ3}] \hypertarget{sq3}{}\textit{Robust Mitigation}: How can pricing policies be constructed to maintain margin integrity under unknown and non-stationary levels of agent contamination?
\end{enumerate}
@@ -64,4 +66,6 @@ Extract final result $r$ from terminal state\;
\end{algorithm}
The previously described goal of distinguishability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
The previously described goal of distinguishability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of distributionally robust optimization \parencite{kuhn_distributionally_2025} in which the learner guards against adversarial contamination in observed demand \emph{distributions}. The decision rule (in the policy) must perform when the data-generating mechanism is not a single known distribution but any member of an ambiguity set described only partially. Here that mechanism is a mixture whose weight and components need not be stationary.
Our work's contributions are best understood as a dependency chain centered around dynamic pricing. The work begins with a formal account of why a non-human mediator threatens pricing power, then we construct a platform capable of generating the interaction data needed for our study of that threat. On top of that \textit{substrate} we build behavioral models to determine whether human and agent traffic can be separated. The resulting contamination estimate is then translated into the pricing core itself, where it serves as a key signal for robust control under distributional uncertainty. The breadth of the thesis is therefore a consequence of the problem structure: the theoretical, behavioral, systems, and control components are not separate projects, but successive requirements of a single argument.

View File

@@ -1,15 +1,16 @@
\section{Literature Review}
\label{sec:literature_review}
To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for distinguishing non-human reconnaissance from genuine human demand expression and integrating that distinguishability into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
To situate the work we review agents and agentic computer use, web automation, economic reasoning, and strategic interaction, then turn to data-driven dynamic pricing under uncertainty. The main technical risk is not ``agents buying things'' in isolation but agents reshaping the behavioral and demand signals on which downstream pricing depends. Related litigation is already underway---for example \textcite{noauthor_amazoncom_2026} under the Computer Fraud and Abuse Act. Mediating actors surface classic concerns such as false-name bidding \parencite{yokoo_effect_2004} or pseudonymous re-entry which can whitewash reputation and weaken defenses \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, whereas classical bot detection targets security and access control. The gap we target is a principled way to separate non-human reconnaissance from genuine human demand expression and to fold that signal into pricing without degrading legitimate users (we track harm with a user-experience index), for the stakeholders named in the introduction.
\subsection{Agent Taxonomy and Definitions}
An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \textcite{russell_artificial_2021} is further developed in an economic context by \textcite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes \parencite{xia_evaluation-driven_2025}.
We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_2024} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate; this is linked to the lack of grounding of these agents and their inability of handling unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
We must however acknowledge that OSWORLD simulations by \textcite{xie_osworld_2024} report a top success rate of only 12.24\% on multi-modal desktop and web tasks, versus about 72\% for humans, reflecting limited grounding and brittle recovery from unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) < P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.
We model agent sessions as producing lower in-session conversion than humans, i.e.\ $P(\text{purchase} \vert A) < P(\text{purchase} \vert H)$, with potentially higher volatility in $\hat{q}$, which we proxy with look-to-book metrics in the simulator.
\subsection{Economic Agents: From Homo Economicus to Machina Economicus}
@@ -21,9 +22,9 @@ A HAP (HTTP Agent Profile) protocol has been developed as an internet draft by \
\subsection{Problem Evidence and Market Impact}
The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown by \textcite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data \parencite{imperva_rapid_2025}.
Contamination in dynamic pricing systems that observe demand features to update prices appears across several industries. Aviation (about 24\% of observed disruptions in one industry survey) illustrates how malicious or scripted traffic can skew KPIs visible in look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown by \textcite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data \parencite{imperva_rapid_2025}.
When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy \parencite{mullapudi_reinforcement_2025}.
When dynamic pricing algorithms train on highly contaminated or noisy data, mis-inference risk rises and revenue is threatened. Mis-specified reward and regret signals can push prices down to preserve volume, eroding margins, while misfit to legitimate demand can produce the opposite failure mode where both call for guardrails that preserve commercial intent \parencite{mullapudi_reinforcement_2025}.
%Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
@@ -31,11 +32,11 @@ When dynamic pricing algorithms operate on highly contaminated or noisy data, th
\subsection{Theoretical Foundations: Economic Parallels}
Early hints of exploration of prices in a standard English auction explored by \textcite{varian_economic_1995} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.
\textcite{varian_economic_1995} studies sequential exploration of prices in an English auction: the bidder's cost can differ slightly from the seller's reservation price. In that setting the buyer incurs no separate cost for searching or exploring prices. The authors argue that any party \emph{responsible} for pricing must be immune to dynamic strategies that extract private information. The link to the Vickrey (second-price) auction, a \textit{direct mechanism}, is that defenses against exploitation may need to pair with mechanisms in which truthful revelation of willingness to pay is incentive-compatible.
Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from intrinsically defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.
The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation. As proposed by \textcite{shahidi_coasean_2025} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets. As argued by \textcite{coase_nature_1937}, the market participation and time associated with that participation, is critical part of the Coasean transaction cost logic which includes the discovery or relevant pricing within a given market. This process of price discovery without the presence of AI Agents can be time consuming and resource intensive. To build on top of this work we provide a proof of optimal conditions theorised by Coaes as an extension to AI-mediated markets.
The mediation between agents and commercial platforms turns on transaction costs of information gathering and negotiation. \textcite{shahidi_coasean_2025} argue these costs tend toward zero (we give a complementary formal result in Section~3). \textcite{coase_nature_1937} treats search and participation time as central to Coasean transaction costs, including discovery of relevant prices. Price discovery without AI intermediaries is already costly. We extend this classical Coasean logic to AI-mediated markets.
% Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
@@ -43,13 +44,13 @@ The key component of this mediation between agents and commercial platforms lays
\subsection{Landscape of Existing Work}
Explorations of the algorithmic collusion by LLMs \parencite{fish_algorithmic_2025} has demonstrated a cross-model tendency of market division with a strong sensitivity to instructions provided in the ``system prompt''. If a dynamic pricing algorithm which is trained to respond to market signals learns to coordinate with competitor agents (or become manipulated by those agents), the market equilibrium is under threat of destabilization. This is particularly true for Q-learning pricing learners as demonstrated by \textcite{calvano_artificial_2018}.
Work on algorithmic collusion by LLMs \parencite{fish_algorithmic_2025} reports cross-model sensitivity to instructions in the ``system prompt,'' including tendencies toward market division. If a dynamic pricing algorithm which is trained to respond to market signals learns to coordinate with competitor agents (or become manipulated by those agents), the market equilibrium is under threat of destabilization. This is particularly true for Q-learning pricing learners as demonstrated by \textcite{calvano_artificial_2018}.
Our effort to combat contamination stems from research by \textcite{hardt_strategic_2015} on strategic classification, in conjunction with \textcite{liu_contextual_2024} who demonstrate a linear regret if contamination is ignored. The strategic classification adversarial effect comes from an effort to manipulate some representative features used in a learning pipeline, which can result in lower prices on loans or lower prices from dynamic pricing algorithms.
To bridge the gap between detection and robust pricing, we look at work in Distributionally Robust Optimization (DRO). As defined by \textcite{kuhn_wasserstein_2024}, DRO provides a framework for decision-making under ambiguity, where the true data distribution is unknown but lies within a ``Wasserstein ball'' of a target distribution. In our context, the ``ambiguity set'' represents the uncertainty introduced by agentic reconnaissance. By optimizing for the worst-case distribution within this set, pricing mechanisms can become resilient to the distributional shifts such as the ones caused by non-human actors, effectively robustifying the revenue function against the contamination described in our problem statement.
In order to create an environment in which prices can be tested against a demand estimate generated by some behavioral model, we take inspiration from the architecture proposed by \textcite{ie_recsim_2019} in the RecSim platform built for recommendation systems. By modeling the distinct user behavior as POMDPs we can generate faithful interactions which allow us to generalize, past the constraint which is also present in recommendation systems, of rarely having enough experience with individual actor's interactions for good recommendations without generalization. The key inspiration comes from the user choice modeling which we translate to a user transition model for each distinct actor type (agent or human). We further consider the possibility of modeling our quantitative research platform using dynamic Bayesian networks for the sake of tractability within the system. The contribution or RecSim enables researchers to better understand learning algorithms in fixed environments, a gap we identify as needing to be bridged within the space of dynamic pricing.
To build an environment where prices face a demand estimate from a behavioral model, we draw on RecSim \parencite{ie_recsim_2019}. Modeling user behavior as partially observable Markov decision processes yields synthetic interaction that generalizes past the usual cold-start limit of per-user data. We translate RecSim-style user choice modeling into per-class transition models (human versus agent). Dynamic Bayesian networks remain a tractability option for the full platform. RecSim's main contribution is a sandbox for recommender learners and we adapt that idea to dynamic pricing under contamination into a sort of contaminated pricing simulator.
% TODO: mention https://github.com/meta-pytorch/OpenEnv/tree/main/envs/browsergym_env
We also acknowledge the difficulty in similarly affected fields such as authorship, where \textcite{ganie_uncertainty_2025} demonstrate the theoretical limits of the distributional divergence between text authored by a human or large language model. Their approach of computing the divergence between two distributions demonstrates purely theoretically that no classifier can outperform random guessing on their particular task. This is yet another factor to take into consideration when exploring the potential mitigation strategies.

View File

@@ -1,13 +1,14 @@
\section{Methodology}
\label{sec:methodology}
% Extra notes and clarifications: we observed some humans and get their transition probabilities between event types
% We modify behavioral profiles of transition matrices with price elasticity matrices generated by sample valuations of a distributing.
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven distinguishability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
This section addresses the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven distinguishability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
\subsection{Problem Formalization}
We define a commercial environment where the platform interacts with a stream of sessions. Let $\mathcal{S}$ denote the set of all sessions. Each session $s \in \mathcal{S}$ is generated by an actor belonging to a latent class $\theta_s \in \{H, A\}$, where $H$ denotes Human and $A$ denotes Agent.
We define a commercial environment where the platform interacts with a stream of sessions. Let $\mathcal{S}$ denote the set of all sessions. Each session $s \in \mathcal{S}$ is generated by an actor belonging to a latent class $Y_s \in \{H, A\}$, where $H$ denotes Human and $A$ denotes Agent.
Each session produces a trajectory of observable events $\tau_s = (e_{s,1}, \ldots, e_{s,L_s})$. An event $e_{s,k}$ is a tuple defined as:
\begin{equation}
@@ -20,12 +21,12 @@ where:
\item $t_{s,k} \in \mathbb{R}_+$ is the continuous timestamp.
\end{itemize}
The platform does not directly observe the true underlying demand function $d(p)$. Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types. We define the demand proxy for product $i$ at epoch $t$ as a weighted aggregation of events:
The platform does not directly observe the true underlying demand function $d(p)$ where $d \in \mathbb{R}^{+}$ and our proxy $\hat{q} \in \mathbb{R}^{+}$. Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types. We define the demand proxy for product $i$ at epoch $t$ as a weighted aggregation of events:
\begin{equation}
\label{eq:qhat}
\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathbb{1}[i_{s,k} = i]
\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathbf{1}[i_{s,k} = i]
\end{equation}
where $\omega: \mathcal{A} \to \mathbb{R}_+$ assigns weights to actions based on their signal strength regarding willingness to pay.
where $\omega: \mathcal{A} \to \mathbb{R}^+$ assigns weights to actions based on their signal strength regarding willingness to pay.
In the current engine implementation, we use the normalized variant of this proxy for each step:
\begin{equation}
@@ -34,20 +35,21 @@ In the current engine implementation, we use the normalized variant of this prox
with fixed category-level weights (cart, dwell, nav, filter) following the same rank order from Table~\ref{tab:action_space}. This keeps the signal dense and directly usable in the simulator.
\subsubsection{Actor Types and Demand Curves}
We formalize the heterogeneity of actors by introducing a type space $\Theta$. An actor of class $Y_s$ is further parameterized by a type $\theta \sim \mathcal{D}_{Y}$. This type determines the actor's demand response function $d(p; \theta)$, sampled from a distribution of possible demand curves. The total observed demand is a stochastic process governed by the naively defined mixture:
We formalize the heterogeneity of actors by introducing a type space $\Theta$. An actor of class $Y_s$ is further parameterized by a type $\theta \sim \mathcal{D}_{Y_s}$. This type determines the actor's demand response function $d\!\left(p \mid Y_s,\theta\right)$, sampled from a distribution of possible demand curves. In compact form, demand remains price-dependent as $d(p\mid Y=y)$. The total observed demand is a stochastic process governed by the naively defined mixture:
\begin{equation}
\label{eq:mixture_demand}
Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p; \theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p; \theta)] + \epsilon_t
Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p\mid Y=H,\theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p\mid Y=A,\theta)] + \epsilon_t
\end{equation}
where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
We address that the composition of two non-stationary variables can cause difficulty distinguishing the sources of possible dynamic composition in online environments, whether from market noise or agents specifically.
Accounting for behavioral and market variation, we also treat $\epsilon_t$ as absorbing serving-path variability from LLM infrastructure (e.g., batch-size-dependent inference behavior under changing load), which appears stochastic at the request level even under greedy decoding \parencite{horace_he_and_thinking_machines_lab_defeating_2025}.
\subsection{Cost of Information (COI) Framework}
The platform's pricing power comes from information asymmetry: users who express strong interest signals pay more than the base price. We quantify this markup as the \textit{Cost of Information} (COI), which represents the average premium extracted above marginal cost. COI measures the revenue at risk when information asymmetry collapses.
A top-level view in the current AI discourse is that sufficiently large productivity gains can induce vertical deflation through cost compression and supply expansion \parencite{rachitsky_marc_2026}. Our contribution is narrower and mechanism-level: even under long-run deflation, platform revenue still depends on short-run information costs to the user. We formalize that rent as the Cost of Information (COI) and study how agentic reconnaissance accelerates its erosion.
The platform's pricing power comes from information asymmetry: users who express strong interest signals pay more than the base price. We quantify this markup as the \textit{Cost of Information} (COI), which represents the average premium extracted above marginal cost. The intuition behind this being a cost comes from the perspective of the user who is interacting with the platform, where the user is the one incurring that ``cost.'' COI measures the revenue at risk when information asymmetry collapses.
A top-level view in the current AI discourse is that sufficiently large productivity gains can induce vertical deflation (vertical supply chain price decrease) through cost compression and supply expansion \parencite{rachitsky_marc_2026}. Our contribution is narrower and mechanism-level: even under long-run deflation, platform revenue still depends on short-run information costs to the user. We formalize that rent as the Cost of Information (COI) and study how agentic reconnaissance accelerates its erosion.
\begin{definition}[Cost of Information]
Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as:
@@ -88,13 +90,14 @@ where $\mathbb{E}[P]$ is the expected price charged by the policy and $\underlin
\draw[<->, thick, red] (\pmin, 2.0) -- (\mean, 2.0) node[midway, above] {COI};
\end{tikzpicture}
\caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$.}
\caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$. The abstraction we assume is that the reservation price $\underline{p}$ already has some innate margin and would always result in at least a break-even transaction.}
\label{fig:coi_illustration}
\end{figure}
We now formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic. As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes.
A fundamental assumption for our claim lies in the alignment of the AI agent through its prompt which has been demonstrated by \cite{fish_algorithmic_2025} to cause strong collusive behavior under linguistic nudges. This assumption can be generalized to the human user asking the agent to research products with a minimizing objective.
\paragraph{Assumption Scope}
The theorem and core experiments in this thesis assume a non-collusive independent-session setting: each agent queries prices independently and does not share sampled quotes across agents. Collusive coordination is outside the current proof scope and is treated as an extension scenario.
\begin{theorem}[COI Erosion in the Limit]
Let $N$ be the number of independent, utility-maximizing agents querying the platform. Let $p_{(1)}$ be the first order statistic (minimum) of the prices offered to these agents. As $N \to \infty$, the Cost of Information converges to 0.
@@ -126,7 +129,7 @@ Since the integrand vanishes as $N \to \infty$ for all $t > \underline{p}$, the
\end{proof}
This result naively proves that standard pricing policies $\pi$ fail to extract surplus in the presence of large-scale agentic search, necessitating a robust counter-mechanism.
This result implies that standard pricing policies $\pi$ cannot extract the same surplus under large-scale agentic search without additional structure, which motivates the robust control layer below.
% The DRO objective creates a lower bound on COI extraction, effectively guaranteeing a minimum margin even in the presence of adversarial agents. we need to prove this and demonstrate that in a theorem.
@@ -135,22 +138,22 @@ This result naively proves that standard pricing policies $\pi$ fail to extract
\subsection{System Architecture: Hybrid Kappa-Lambda Architecture}
In order for our research to have grounding in interactions we built a robust e-commerce web-platform. We initially conducted a survey of the leading platforms of airlines and hotel booking sites to identify the specific interface patterns that effectively manage complex travel data. Our analysis revealed a clear industry standard: while both sectors rely on tabbed service selection and left-sidebar filtering to streamline navigation, they diverge in result presentation: airlines utilize visual date-price bars and multi-step wizards to optimize for logistical transparency, whereas hotel platforms leverage image-led cards and scarcity triggers to drive emotional engagement and urgency. Our web framework defines a highly agnostic boilerplate which can be seeded with any data-modality with an easy-to-tailor pattern, which we leverage to define a \texttt{hotel} and \texttt{airline} mode. Both modes are then individually deployed via an environment level argument which adjusts the proxy routing with a custom middleware inside next.js to render only the desired mode. The purpose of this was to create a baseline adaptable to any use-case or desired commercial application.
In order for our research to have grounding in interactions we built a robust e-commerce web-platform. In this framing Kappa represents streamed processing and Lambda batch operations as is given by terminology in big-data processing. We initially conducted a survey of the leading platforms of airlines and hotel booking sites to identify the specific interface patterns that effectively manage complex travel data. To better understand the playing field, we collected artifacts on design across various airlines and hotels. While both sectors rely on tabbed service selection and left-sidebar filtering to streamline navigation, they diverge in result presentation: airlines utilize visual date-price bars and multi-step wizards to optimize for logistical transparency, whereas hotel platforms leverage image-led cards and scarcity triggers to drive emotional engagement and urgency. Our web framework defines a highly agnostic boilerplate which can be seeded with any data-modality with an easy-to-tailor pattern, which we leverage to define a \texttt{hotel} and \texttt{airline} mode. Both modes are then individually deployed via an environment-level argument which adjusts the proxy routing with custom middleware in Next.js to render only the desired mode. The purpose of this was to create a baseline adaptable to any use-case or desired commercial application.
The architecture of this platform begins with the deployed web-apps posting interaction data to our backend which processes them and stores each ingested interaction into a kafka cluster. This serves as our data reservoir tracking and associating each interaction with its session and importantly with which experiment it belongs to. Not only do we track the behavioral interactions, but our pricing provider micro-service, once called by the frontend reports the observed/queried price-product into kafka. This kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The final stage of the pricing pipeline, submits computed dynamic pricing results into a redis database for quick updates which is then read by the pricing provider and displayed on the webapp. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
The architecture begins with deployed web applications posting interaction data to a backend that stores each record in Apache Kafka. Kafka acts as the reservoir linking sessions to experiments. Behavioral events and, separately, price observations from the pricing-provider microservice (invoked by the frontend) land in Kafka topics. A scheduled Airflow pipeline (with manual triggers) consumes the stream and the final pricing stage writes vectors to Redis for low-latency reads by the provider and display in the client. This design pattern allows us to generalize to other commercial settings, where Kafka is used for durability and replay, Redis for serving and quick queries. We invested in this stack to keep runs reproducible and to limit extraneous variance so the same skeleton applies across e-commerce settings.
\paragraph{Public Web Artifact} We transition the Kappa like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters or what the framework expects in terms of schemas for pricing providers and log ingestion servicse. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
\paragraph{Public Web Artifact} We transition the Kappa-like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters and expected schemas for pricing providers and log ingestion services. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
\paragraph{Public Dataset} For reproducibility of the behavioral analysis and distinguishability experiments, we also release the interaction dataset used in this thesis as \textit{WhoClickedIt}. The dataset is hosted on Hugging Face \footnote{\url{https://huggingface.co/datasets/velocitatem/whoclickedit}} and is distributed as one flattened event sheet (\texttt{whoclicked.csv}) with explicit labels (\texttt{actor\_type}, \texttt{is\_agent}, and \texttt{record\_type}). The associated dataset card specifies the schema, collection process, and known limitations; a full copy is included in Appendix~\ref{app:whoclicked_card}.
\paragraph{Public Dataset} For reproducibility of the behavioral analysis and distinguishability experiments, we also release the interaction dataset used in this thesis as \textit{WhoClickedIt}. The dataset is hosted on Hugging Face \footnote{\url{https://huggingface.co/datasets/velocitatem/whoclickedit}} and is distributed as one flattened event sheet (\texttt{whoclicked.csv}) with explicit labels (\texttt{actor\_type}, \texttt{is\_agent}, and \texttt{record\_type}). The dataset card on that page documents the schema, collection process, and known limitations.
\subsubsection{DevOps Principles}
Reproducible results are key to quality research platforms, this is taken into mind when deploying and working with our research platform. From a deployment standpoint the platform can be deployed across a large variety of providers and can be run locally. When developing a new interaction modality apart from the ones that come out of the box, a simple template pattern can be followed. The middleware of the framework is designed to properly render the chosen modality from environmental variables, thus deployment of different or parallel version of the software can be easily parametrized.
Reproducibility guided deployment choices: the stack runs locally or on common cloud providers. New interaction modalities follow a small template; middleware reads environment variables so parallel deployments (e.g.\ staging versus production-like experiments) differ only in configuration, not in forked codebases.
\subsubsection{Online Dynamic Pricing}
In order to collect data from actors under correct conditions we replicate a naive and simple dynamic pricing algorithm which runs in the background during the experiments.
To expose participants to state-dependent prices without over-constraining the study, we run a transparent surge--discount heuristic in the background during data collection.
The dynamic pricing done is handled by a pipeline which computes a demand estimate on a per-product basis of a specific window of the data, defined by the period $T$ which by default is 5 minutes. This dynamic pricing pipeline computes a demand estimate vector $\hat{q} \in \mathbb{R}^N$ by a weighted sum of interactions for each product, it additionally computes a price elasticity vector $\hat{\epsilon}$ in the same dimensions as our demand. The final features matrix is of the size $N \times 2$ which we translate to a new price vector $\hat{p} \in \mathbb{R}^N$.
@@ -158,14 +161,14 @@ The transformation that governs this dynamic pricing is a very simple surge-base
\begin{equation}
\hat{p}_i = \begin{cases}
p_{0,i} \cdot \lambda_{\text{surge}} & \text{if } \hat{q}_i \geq \theta_{\text{high}} \\
p_{0,i} \cdot \lambda_{\text{disc}} & \text{if } \hat{q}_i \leq \theta_{\text{low}} \\
p_{0,i} & \text{otherwise}
p_{0,i} \cdot \lambda_{\text{surge}} & \text{if } \hat{q}_i \geq \varrho_{\text{high}} \\
p_{0,i} \cdot \lambda_{\text{disc}} & \text{if } \hat{q}_i \leq \varrho_{\text{low}} \\
p_{0,i} & \text{otherwise}
\end{cases}
\quad \forall i \in \{1, \ldots, N\}
\end{equation}
where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our database distinctly for each mode of the commerce platform), $\theta_{\text{high}}, \theta_{\text{low}} \in \mathbb{R}$ are demand thresholds defining surge and discount regions, and $\lambda_{\text{surge}}, \lambda_{\text{disc}} \in \mathbb{R}^+$ are multiplicative factors with typical values $\lambda_{\text{surge}} = 1.2$ and $\lambda_{\text{disc}} = 0.9$. This piecewise function enables rapid price adjustment in response to observed demand without requiring complex elasticity estimation or historical calibration, allowing us to expose actors within our experiments to a system with a dynamic component of pricing.
where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our database distinctly for each mode of the commerce platform), $\varrho_{\text{high}}, \varrho_{\text{low}} \in \mathbb{R}$ are demand thresholds defining surge and discount regions, and $\lambda_{\text{surge}}, \lambda_{\text{disc}} \in \mathbb{R}^+$ are multiplicative factors with typical values $\lambda_{\text{surge}} = 1.2$ and $\lambda_{\text{disc}} = 0.9$. This piecewise function enables rapid price adjustment in response to observed demand without requiring complex elasticity estimation or historical calibration, allowing us to work with actors within our experiments to a system with a dynamic component of pricing.
% For our offline experimental setting, we generalize a master value function that can encompass different demand estimation and pricing strategies.
%
@@ -177,19 +180,32 @@ where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our
\subsection{Experimental Design}
We start from a practical constraint: we do not have access to proprietary production data. Because of that, we design our own fictional platform that still represents how commercial platforms work in the real world. The design comes from a survey of hotel and airline websites, where we extracted common interface components and used them as a high-level template for dynamic pricing environments.
% We start from a practical constraint: we do not have access to proprietary production data. Because of that, we design our own fictional platform that still represents how commercial platforms work in the real world. The design comes from a survey of hotel and airline websites, where we extracted common interface components and used them as a high-level template for dynamic pricing environments.
In the aforementioned platform we develop for our experiments, we use the surveyed websites and create an \textit{average} representation of what the most expected interfaces would be by extracting common components and designing a high level template for dynamic pricing environments.
The interface is organized as a product catalog where each product belongs to a time-bounded price vector (for example, a daily pricing period). During each period we collect interaction data by instrumenting UI components and predefined action templates that are still customizable. This gives us control without losing realism.
The interface is organized as a product catalog where each product belongs to a time-bounded price vector (for example, a daily pricing period). During each period we collect interaction data by instrumenting UI components and predefined action templates that are still customizable. That yields controlled variation while keeping the interface controlled-for.
Since users act with motivations, we define a pool of tasks (jobs to be done) and assign tasks randomly to participants.
The task pool is stored as a structured table with fields \texttt{id}, \texttt{created\_at}, \texttt{task\_name}, \texttt{task\_description}, and \texttt{task\_def\_of\_done}. We formulate the tasks as compact jobs-to-be-done rather than as strict click scripts, because the target is to elicit realistic browsing and comparison behavior which can capture nuance of different people. In hotel mode the assigned tasks include \textit{Cheapest Room}, \textit{Cheapest Room w/ View}, \textit{MultiStep Cheapest Room}, \textit{The Digital Nomad (Executive)}, and \textit{The 3-Way Tradeoff (Desk + Quiet + Flexible)}. These prompts deliberately require critical thought in search, inspection of room details, comparison of amenities or images, return visits to the listing page, and a final booking decision which create a degree of cognitive load. In airline mode we use \textit{Last-Minute One-Way Flight}, where the actor must urgently travel to LAX from either SEA or JFK within the next 1--3 days, inspect at least a small set of candidate itineraries, and then book a reasonable earliest departure.
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor.
We discuss limitations and choices made in this experimental design in Section~\ref{sec:limitations_risks}.
The task pool is stored as a structured table with fields \texttt{id}, \texttt{created\_at}, \texttt{task\_name}, \texttt{task\_description}, and \texttt{task\_def\_of\_done}. We formulate the tasks as compact jobs-to-be-done rather than as rigid instructions, because the target is to elicit realistic browsing and comparison behavior which can capture nuance of different people. In hotel mode the assigned tasks include \textit{Cheapest Room}, \textit{Cheapest Room w/ View}, \textit{MultiStep Cheapest Room}, \textit{The Digital Nomad (Executive)}, and \textit{The 3-Way Tradeoff (Desk + Quiet + Flexible)}. These prompts deliberately require critical thought in search, inspection of room details, comparison of amenities or images, return visits to the listing page, and a final booking decision which create a degree of cognitive load. In airline mode we use \textit{Last-Minute One-Way Flight} or \textit{Family/Work Emergency Travel}, where the actor must urgently travel to LAX from either SEA or JFK within the next 1 to 3 days, inspect at least a small set of candidate itineraries, and then book a reasonable earliest departure. Figure~\ref{fig:exp_design_tree} summarizes the assignment tree.
\begin{figure}[ht]
\centering
\resizebox{0.88\columnwidth}{!}{%
\input{chapters/figures/experiment_design_tree.tex}
}
\caption{Experimental design decision tree for participant assignment.}
\label{fig:exp_design_tree}
\end{figure}
A representative task is to find the cheapest feasible catalog item under explicit constraints while removing strict financial limits so we avoid trivial optimization behavior. Participants are also randomly assigned to one experimental platform mode (hotel or airline). Once assigned, they are dropped into the experiment with an actor ID. Under each experiment ID, we can observe multiple sessions across time and gather long interaction traces for the same actor. This de-risks our lower sample size of individuals by allowing broad interaction data to come from each one.
The human data collection involved 13 participants, all of whom provided explicit informed consent prior to their session. Participants had an average age of 21 years and were recruited from a university population. Alongside the 13 human sessions we ran 16 agent sessions of equivalent task scope, yielding 29 labeled trajectories in total (45\% human, 55\% agent). Each participant was assigned a single platform mode and a single task drawn from the pool, and completed the session independently without guidance on navigation or pricing strategy.
To evaluate quality and realism of the setup, we store both structured event logs and full interaction transcripts. This lets us combine quantitative analysis with transcript-level qualitative findings. The result is an isolated system where we can control the interaction process while preserving realistic behavior.
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to distinguish classes $\theta \in \{A,H\}$ with session-conditioned probability estimates, then injects those estimates into the pricing learner.
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping). This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions. The second half uses collected behavioral traces to distinguish classes $Y \in \{A,H\}$ with session-conditioned probability estimates, then injects those estimates into the pricing learner.
Our process follows three stages: (1) observe and \textit{vectorize} behavioral interactions, (2) learn distinguishability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
@@ -215,16 +231,15 @@ Our web platform (developed in similar spirit to RecSim \parencite{ie_recsim_201
To speak to realism, user interviews reported that the platform architecture mirrored standard booking interfaces and reduced the cognitive load required to learn the system. One participant described the flow as ``intuitive'' and close to a ``normal'' transaction, suggesting observed behavior was primarily driven by pricing treatment rather than interface novelty.
The dynamic pricing mechanism elicited immediate behavioral adjustments. Participants were sensitive to price volatility: sudden boosts triggered urgency and faster booking attempts, while large listing-to-final discrepancies triggered deeper comparison behavior. This is comforting because the controlled setup still produces commercially relevant interaction data.
The dynamic pricing mechanism elicited immediate behavioral adjustments. Participants were sensitive to price volatility: sudden boosts triggered urgency and faster booking attempts, while large listing-to-final discrepancies triggered deeper comparison behavior. The responses match what one expects from live e-commerce experiences, such as reactions to volatility, which supports external validity despite the lab setting.
\subsubsection{Design of Training Factorial Study}
\subsubsection{Design of Training Sweeps}
The simulator has multiple configurable factors. We design a multi-factor study across five axes derived from the sweep configurations: (1) RL algorithm (\texttt{ppo}, \texttt{a2c}, \texttt{dqn}, \texttt{qtable}; 4 levels), (2) contamination ratio $\alpha$ sampled from $[0.1, 0.6]$ at four representative levels, (3) robustness radius $\epsilon_\alpha \in \{0.0, 0.15, 0.3\}$ (3 levels), (4) COI penalty weight $\lambda_\text{coi}$ at two reference levels, and (5) pricing action granularity (two discretization settings for \texttt{action\_levels}); giving a grid of $4\times4\times3\times2\times2 = 192$ configurations. Statistical power for the behavioral comparisons is determined by a two-sample test over per-session KL divergence scores; a formal power analysis with minimum detectable effect size at $n_H=13$, $n_A=16$ is reported in the results.
% Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n_H=13 and n_A=16. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
The simulator has multiple configurable factors. Training runs are driven by Weights \& Biases sweep definitions versioned with the codebase, mixing random and grid schedules rather than a single full factorial. For the contamination ratio $\alpha$, exploratory sweeps draw $\alpha$ uniformly on $[0.1,0.6]$ and then some sweeps use the narrower interval $[0.1,0.5]$. Grid sweeps fix explicit level sets, for example $\alpha\in\{0.1,0.2,0.3,0.4,0.6,0.8\}$ (six levels, including $0.8$ beyond the typical exploratory upper endpoint) or five levels $\{0.1,0.2,0.3,0.4,0.6\}$. Auxiliary schedules also include $\alpha=0$ alongside positive values. Robustness radius $\epsilon_\alpha$, COI penalty $\lambda_\text{coi}$, RL algorithm (\texttt{ppo}, \texttt{a2c}, \texttt{dqn}, \texttt{qtable}), and the discretization of the price action grid vary by sweep. Broad random search may use uniform $\epsilon_\alpha\in[0,0.3]$ and $\lambda_\text{coi}\in[0.05,0.6]$; tighter grids may fix $\epsilon_\alpha=0.2$ and restrict $\lambda_\text{coi}$ to $\{0.15,0.30\}$. Behavioral distinguishability is assessed with a two-sample Mann--Whitney test on per-session divergence gap scores at cohort sizes $n_H=13$ and $n_A=16$.
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration, and reserve on-demand v4 capacity for runs that should not be interrupted.
\begin{table}[ht]
\centering
@@ -259,12 +274,13 @@ v4 & 64 (32 + 32) & us-central2-b & 32 Spot + 32 On-demand \\
\end{tabular}
\end{table}
For connections from Madrid, we prioritize the europe-west4 allocation for latency-sensitive runs with the benefit of having the most grouped chips within a single region. This regional grouping is important for the deployment of our Kubernetes cluster which cannot span multiple regions. All sweep metadata, model checkpoints, and reward traces are logged in Weights \& Biases. % TODO: cite this (from bib)
Hardware specifications are from the official Google Cloud TPU documentation \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
For connections from Madrid, we prioritize the europe-west4 allocation for the sake of latency and the benefit of having the most grouped chips within a single region. This regional grouping is important for the deployment of our Kubernetes cluster which cannot span multiple regions. All sweep metadata, model checkpoints, and reward traces are logged in Weights \& Biases. \parencite{noauthor_tpu_2026,noauthor_tpu_2025-1,noauthor_tpu_2025}.
% TODO: cite this (from bib)
Design of training processes: we build docker image with the fact in mind of different caching over layers in order to most speed up docker re-building and such we place the most volatile steps towards the end of the image building. What is means in practice is that any dependency installations are isolated so edits to source code do no trigger rebuilds. Only if we update our entry point of training a sweep, Docker will also rebuild the source-code copy stage.
Due to the preemptive nature of the current demand of TPU chips we sttle for running our on demeaned as the primary source of compute. The on demand TPU pod of 32 chips spread across 4 virtual hosts creates a relatively unique parallelization setup. Despite our desire to use a traditional approach of clustering and perhaps deploying SLURM jobs of our sweep agent, the lack of predictability in provisioning each instance of a compute resource makes this an high friction layer we do not want to add.
Training images abide by Docker layer caching principles with maximal caching on the lowest levels. Dependency layers are separate from the copy of application source so code edits or tweaks do not re-boot the entire build such that only changes to the training entrypoint or dependencies force a full rebuild.
TPU capacity is scarce and often preemptible, so we rely primarily on on-demand pods for workloads that must finish without interruption. A typical reservation is a 32-chip pod across four worker VMs. That layout already gives enough parallelism for our sweep driver without adding a separate cluster scheduler. We considered SLURM-style job arrays, but fluctuating provisioning times would have added operational overhead with little benefit for our workload, so orchestration stays in the container and Ray layer described below.
\subsubsection{Interaction Schema}
@@ -272,7 +288,7 @@ We extend the basic event tuple $e_{s,k}$ to capture the full observational sign
\begin{equation}
e_{s,k} = \left( a_{s,k}, \, i_{s,k}, \, t_{s,k}, \, \mu_{s,k}, \, \delta_{s,k} \right)
\end{equation}
where $\mu_{s,k} \in \mathcal{M}$ is a metadata record containing action-specific context (e.g., price observed, filter parameters, element text), and $\delta_{s,k} \in \mathbb{R}_+$ is the dwell time in milliseconds for attention-based actions.
where $\mu_{s,k} \in \mathcal{M}$ is a metadata record containing action-specific context (e.g., price observed, filter parameters, element text), and $\delta_{s,k} \in \mathbb{R}^+$ is the dwell time in milliseconds for attention-based actions.
A session $s$ is itself a structured record:
\begin{equation}
@@ -299,8 +315,7 @@ $\mathcal{A}_{\text{filter}}$ & \texttt{search}, \texttt{filter\_date}, \texttt{
\end{table}
This partition enables the weight function $\omega$ from Eq.~\ref{eq:qhat} to assign category-specific signal strengths, with $\omega(\mathcal{A}_{\text{cart}}) > \omega(\mathcal{A}_{\text{dwell}}) > \omega(\mathcal{A}_{\text{nav}}) > \omega(\mathcal{A}_{\text{filter}})$ reflecting decreasing commitment.
Its important to acknowledge that this creates a very blatant assumption in the weighting, we do motivate the scale of each weight by the per-category observed divergence between each behavioral profile.
In the simulator baseline this order is encoded with a compact fixed scale: cart $=4.0$, dwell $=2.0$, nav $=1.0$, filter $=0.5$. Unknown actions are mapped by prefix heuristics to the nearest category.
The ordering cart $>$ dwell $>$ nav $>$ filter is a deliberate simplification: we set it from early data by ranking categories by KL divergence between human and agent transition rows and then spacing weights in powers of two. The simulator encodes cart $=4.0$, dwell $=2.0$, nav $=1.0$, filter $=0.5$ and finally unknown actions map by prefix to the nearest category (or are discarded).
The metadata record $\mu$ varies by action type. For product views, $\mu$ contains the observed price $p_{\text{obs}}$ and product attributes. For dwell events, $\mu$ includes the element text and accumulated hover duration. This heterogeneous structure is captured via a schema-on-read approach in our Kafka ingestion pipeline, where events are validated against type-specific schemas before storage.
@@ -315,11 +330,11 @@ To train a robust pricing learner, we need a simulator that can generate realist
\subsubsection{Ground-Truth Distinguishability}
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $\theta_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels distinguishable enough to justify downstream pricing control that depends on that distinguishability?
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $Y_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels distinguishable enough to justify downstream pricing control that depends on that distinguishability?
To answer this, we compute per-session KL divergence scores against both class-level centroids. For each session $s$ in either partition, we fit a session-level event transition kernel $\hat{\mathcal{T}}_s$ from that session's trajectory alone, then compute its average KL divergence to the human centroid ($\Delta_{H,s}$) and to the agent centroid ($\Delta_{A,s}$). The per-session distinguishability score is the gap $\Delta_{H,s} - \Delta_{A,s}$: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
For each session $s$ we fit a session-level transition kernel $\hat{\mathcal{T}}_s$, then average KL divergence to the human centroid ($\Delta_{H,s}$) and to the agent centroid ($\Delta_{A,s}$). The distinguishability score is the gap $\Delta_{H,s} - \Delta_{A,s}$ (negative $\approx$ human-like, positive $\approx$ agent-like). KL is used because it compares full categorical rows, not single features.
The normality assumption cannot be made for KL divergence distributions, which are right-skewed and bounded below by zero, so we do not use a Student's $t$-test. Instead we apply a Mann-Whitney $U$ test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the stochastic ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations. We report $U$, the exact two-sided $p$-value, and group-level descriptive statistics for the gap scores.
Gap scores are skewed and nonnegative, so we test cohort differences with a Mann--Whitney $U$ test \parencite{mann_test_1947} rather than a $t$-test. We report $U$, the two-sided $p$-value, and descriptive statistics for each group.
\begin{definition}[Kullback-Leibler Divergence for Transition Distributions]
Let $P_e$ and $Q_e$ be categorical distributions over destination states following event $e$, derived from human and agent trajectories respectively. The KL divergence between these distributions is:
@@ -328,21 +343,22 @@ Let $P_e$ and $Q_e$ be categorical distributions over destination states followi
\end{equation}
where $\mathcal{S}_e$ denotes the set of destination events that follow $e$ in the human trajectories.
\end{definition}
We exploit KL asymmetry so that ``distance from human-like'' is explicit in the score, not only distance from agents.
To obtain this statistic, we aggregate transitions by triggering event $e$ and treat normalized outgoing probabilities as categorical distributions $P_e$ (human) and $Q_e$ (agent). We intersect shared event labels, then accumulate log-ratio contributions over shared destinations. Large contributions, including near-zero $Q_e(k)$ cases, identify transitions where one actor class is difficult to mimic.
With these divergence features we train a contrastive model to estimate a weak agent probability $f(\tau)\in[0,1]$, which we later use as a weighting and control signal.
With these divergence features we compute a weak agent probability $f(\tau')\in[0,1]$ directly from divergence gaps, which we later use as a weighting and control signal.
\subsubsection{Transition Probability Estimation}
\label{sec:tpe}
For both subsets, we model session dynamics as an MDP and estimate transition kernel $\mathcal{T}$. For each actor type we estimate global kernels $\hat{\mathcal{T}}_A$ and $\hat{\mathcal{T}}_H$, then cluster into behavioral sub-kernels $\hat{\mathcal{T}}_y^i$ to avoid collapsing all behavior into one average profile. Transition probabilities are estimated by maximum likelihood:
For both subsets, we model session dynamics as a Markov decision process and estimate transition kernel $\mathcal{T}$. For each actor type we estimate global kernels $\hat{\mathcal{T}}_A$ and $\hat{\mathcal{T}}_H$, then cluster into behavioral sub-kernels $\hat{\mathcal{T}}_y^i$ to avoid collapsing all behavior into one average profile. Transition probabilities are estimated by maximum likelihood:
\begin{equation}
\hat{P}(s' \mid s) = \frac{N(s, s')}{\sum_{k \in \mathcal{S}} N(s, k)}
\end{equation}
where $N(s, s')$ is the observed transition count. This allows us to construct a \textit{Contamination Generator} $\mathcal{G}(\alpha)$. Given a clean trajectory dataset, $\mathcal{G}$ injects synthetic agent trajectories sampled from $\hat{\mathcal{T}}_A$ until the effective mixing ratio reaches $\alpha$. The properties of an MDP such as ... should be preserved by the operation described below.
where $N(s, s')$ is the observed transition count. This allows us to construct a \textit{Contamination Generator} $\mathcal{G}(\alpha)$. Given a clean trajectory dataset, $\mathcal{G}$ injects synthetic agent trajectories sampled from $\hat{\mathcal{T}}_A$ until the effective mixing ratio reaches $\alpha$. The properties of an MDP such as a discrete state space, nonnegative transition mass, and row-stochasticity ($\sum_{s'}\hat{P}(s'\mid s)=1$ for visited states) should be preserved by the operation described below.
To scale this to catalog-level pricing, we expand the base event transition matrix from $T\times T$ into product-specific transitions using the current demand condition. In practice, we normalize the demand vector across products and use it to weight how much transition mass each product pair receives. Concretely, each cell of the base matrix becomes an $N\times N$ block (for $N$ products), so the transition matrix grows from $T\times T$ to $(T\cdot N)\times(T\cdot N)$. Finally, we add $C$ generic states (homepage, login, checkout terminal states), which gives the full kernel size $(T\cdot N + C)\times(T\cdot N + C)$.
% The validity of this demand-weighted block expansion is still subject to formal proof: it needs to be shown that the resulting matrix retains row-stochasticity (rows summing to 1) and that the weighting by the demand vector preserves the Markov property for the expanded state space. In the engine source this is the target of ongoing validation before the expansion is relied on for behavioral generation at scale.
@@ -364,7 +380,8 @@ To scale this to catalog-level pricing, we expand the base event transition matr
\subsection{Distributionally Robust Reinforcement Learning (DR-RL)}
We formulate pricing as a Stackelberg game: the platform (leader) sets prices $p_t$, and the population (follower) responds through trajectories and demand. A useful intuition is that the platform behaves like a distorted mirror at a 45-degree angle: what it mirrors is population demand into an estimated demand proxy, and that proxy drives revenue.
We formulate pricing as a Stackelberg game in which the platform (leader) sets prices $p_t$, and the population (follower) responds through trajectories and demand. A useful intuition is that the platform behaves like a distorted mirror at a 45-degree angle: what it mirrors is population demand into an estimated demand proxy, and that proxy drives revenue.
% TODO: add canonical Stackelberg citation.
Because contamination level $\alpha$ and demand shift are non-stationary online, a simple error term is not enough. We therefore use a Distributionally Robust Optimization objective. Let $\tau'$ be a newly observed trajectory generated by an unknown actor profile (sampled from the behavioral models in Section~\ref{sec:tpe}). We need a demand mapping conditioned on price and trajectory, $\hat{Q}(p,\tau')$. For each $\tau'$, we compute $\hat{\mathcal{T}}'$ and compare it with controlled baselines $\bar{\mathcal{T}}_H$ and $\bar{\mathcal{T}}_A$:
@@ -375,9 +392,38 @@ Because contamination level $\alpha$ and demand shift are non-stationary online,
\Delta_A &= D_{KL}(\hat{\mathcal{T}}^\prime \parallel \bar{\mathcal{T}}_A)
\end{align}
This yields two centroid-like heuristics that act as a session-level agent score in the engine. On a per-customer or use-case basis a similar study should be done in order to obtain ground truth behavior models for humans and agents and their specific interaction with a given products website.
From these two divergences we define the gap score following previously highlighted intuition of the divergence:
\begin{equation}
g(\tau') = \Delta_H(\tau') - \Delta_A(\tau').
\end{equation}
Positive values indicate trajectories farther from the human centroid and closer to the agent centroid.
In implementation, we maintain an alternating game-history stack (our \textit{Limbo} stack) and execute it explicitly every epoch with exactly two transitions: first the platform publishes a price vector (leader move), then the market responds with trajectory-derived demand (follower move).
We map this gap to a weak agent probability using a temperature-controlled logistic map:
\begin{equation}
f(\tau') = P(Y=A\mid\tau') = \operatorname{softmax}(-\Delta_A,-\Delta_H)_A = \sigma\left(\frac{\Delta_H-\Delta_A}{T}\right), \quad T>0.
\end{equation}
The session-level control signal injected into pricing is then
\begin{equation}
\hat{\alpha}(\tau') = f(\tau').
\end{equation}
\begin{figure}[ht]
\centering
\input{chapters/figures/sigmoid_softmax_gap.tex}
\caption{Logistic mapping from the gap $\Delta_H-\Delta_A$ to the weak agent probability $f(\tau')$. Markers indicate the contrasts $\Delta_H<\Delta_A$ and $\Delta_H>\Delta_A$.}
\label{fig:sigmoid_softmax_gap}
\end{figure}
This turns distinguishability into an operational control input in the engine. On a per-customer or use-case basis, a similar data collection and fitting process should be repeated to obtain domain-specific behavior kernels.
In implementation we keep an alternating game-history buffer and advance it each epoch with two transitions where the platform publishes a price vector (leader move), then the environment returns trajectory-derived demand (follower move). We call this the \textit{Limbo}.
To avoid notation drift, we separate two COI objects used for different purposes:
\begin{align}
\text{COI}_{\text{level}}(\pi) &= \mathbb{E}[P]-\underline{p}\\
\text{COI}_{\text{leak}}(p,\tau') &= f(\tau')\cdot \text{InfoValue}(p,\tau')
\end{align}
where $\text{COI}_{\text{level}}$ is evaluated at policy level and $\text{COI}_{\text{leak}}$ is evaluated per observed quote during training. Subsequently, when discussing the reward structure, we will better understand the term of the information value.
% Mention discretized action space and the clipping and over shotting in continuous action spaces
% Also talk about catastrophic economics, we add termination on bankrupcy or zero demand so market collaps
@@ -398,7 +444,7 @@ and we evaluate a small fixed grid in $\mathcal{A}_{\epsilon_\alpha}(\alpha_0)$
\subsubsection{Environment Setup for Dynamic Pricing}
The complete pricing-demand-trajectory loop is illustrated in Figure~\ref{fig:oracle_flow}. The Oracle maps historical price and demand state to a new price vector, which is exposed to a distribution of demand curves. Each product generates trajectories weighted by behavioral kernels $\tau_\theta$, producing a full transition matrix $\tau'$ over sessions. Sampled trajectories $\{\tau_k\}$ are aggregated through the demand proxy function $Q(\cdot)$ to yield the next demand vector, which feeds back into the Oracle.
The complete pricing-demand-trajectory loop is illustrated in Figure~\ref{fig:oracle_flow}. The Oracle maps historical price and demand state to a new price vector, which is exposed to a distribution of demand curves. Each product generates trajectories weighted by behavioral kernels $\tau_Y$, producing a full transition matrix $\tau'$ over sessions. Sampled trajectories $\{\tau_k\}$ are aggregated through the demand proxy function $Q(\cdot)$ to yield the next demand vector, which feeds back into the Oracle.
\begin{figure}[ht]
\centering
@@ -414,7 +460,7 @@ p_N
\end{pmatrix}
\underrightarrow{d_i \sim \mathcal{N}_{\vec{p}}}
\begin{pmatrix}d_0\\ d_1\\ \cdots \\ d_N\end{pmatrix}
\underrightarrow{\vec{d}\otimes \tau_\theta}
\underrightarrow{\vec{d}\otimes \tau_Y}
\begin{bmatrix}
0.01 & 0.02 & \cdots & 0.3 \\
0.41 & 0.24 & \cdots & 0.0 \\
@@ -434,7 +480,7 @@ p_N
\end{aligned}
$}%
}
\caption{Oracle-based pricing loop: historical price and demand state map to a new price vector; each product samples demand curves from $\mathcal{N}_{\vec{p}}$; trajectories are generated via the Kronecker product $\vec{d}\otimes\tau_\theta$ into transition matrix $\tau'$; sampled trajectories $\{\tau_k\}$ aggregate through proxy $Q(\cdot)$ to yield updated demand $\vec{\hat{q}}$, closing the feedback loop.}
\caption{Oracle-based pricing loop: historical price and demand state map to a new price vector; each product samples demand curves from $\mathcal{N}_{\vec{p}}$; trajectories are generated via the Kronecker product $\vec{d}\otimes\tau_Y$ into transition matrix $\tau'$; sampled trajectories $\{\tau_k\}$ aggregate through proxy $Q(\cdot)$ to yield updated demand $\vec{\hat{q}}$, closing the feedback loop.}
\label{fig:oracle_flow}
\end{figure}
@@ -442,39 +488,45 @@ $}%
The robust policy $\pi^*$ is obtained by solving the maximin problem:
\begin{equation}
\label{eq:robust_policy}
\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}_{\text{leak}}(p,\tau') \right]
\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}_{\text{leak}}(p,\tau') - \eta_{\text{ux}} \cdot \text{UX}(\tau', p) \right]
\end{equation}
where $R(p, d)$ is the revenue function and $\lambda$ weighs the information-leakage penalty.
where $R(p, d)$ is the revenue function, $\lambda$ weighs the information-leakage penalty, $\eta_{\text{ux}}$ weighs the user-experience penalty, and $\text{UX}(\tau', p)\in[0,1]$. We note that $p$ is directly dependent on $\pi$, which is the one deciding this as its action.
Looking at the reward structure, note that we are not subtracting COI but rather the leakage of COI, which is as defined below.
In practice, we parameterize this with a session-level leakage term:
\begin{equation}
\text{COI}_{\text{leak}}(p,\tau') = f(\tau')\cdot \text{InfoValue}(p,\tau')
\end{equation}
where $f(\tau')$ is the weak agent probability and $\text{InfoValue}$ is implemented either as a constant query-tax surrogate or as a revelation surrogate $-\log\pi(p\mid\tau')$.
where $f(\tau')$ is the weak agent probability and $\text{InfoValue}$ is implemented either as a constant query-tax surrogate or as a revelation surrogate $-\log\pi(p\mid\tau')$. This is the surprise of a certain price-setting probability. Essentially, we proxy the leakage term as a surprise of the price our policy is setting, weighted by the contamination estimate. Appendix~\ref{app:revelation_log} expands on why the logarithm is used in the revelation surrogate.
The inner minimization selects the contamination candidate that makes the penalized reward smallest, so the outer policy update faces the worst plausible leakage scenario inside the ambiguity set rather than an average case.
For the baseline engine reported here, we intentionally use the constant query-tax surrogate to keep the mechanism minimal:
\begin{equation}
r_t = R(p_t,\tilde q_t) - \lambda\,f(\tau_t')\,c_{\text{info}}
\label{eq:baseline_step_reward}
r_t = R\!\left(p_t,\hat{Q}_t\right) - \lambda\,f(\tau_t')\,c_{\text{info}} - \eta_{\text{ux}}\,\text{UX}(\tau_t', p_t)
\end{equation}
with fixed $c_{\text{info}}>0$.
with fixed $c_{\text{info}}>0$, matching the leakage term $\text{COI}_{\text{leak}}=f(\tau_t')\,c_{\text{info}}$ and the user-experience penalty already introduced in~\eqref{eq:robust_policy}.
Another possible extension is to adapt the ambiguity radius online, e.g., $\epsilon(\Delta_H)$, so the Wasserstein ball changes with live divergence. We keep this as future work and retain a fixed-radius setup because Wasserstein ambiguity already handles heavy-tail and ``black swan'' behavior without absolute continuity assumptions \parencite{kuhn_wasserstein_2024}.
\subsubsection{Actor Implementation}
In our simulation, the ``follower'' is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$.
In our simulation, the ``follower'' is implemented as a set of Actors. Each Actor is initialized with a class $Y$ and a latent type $\theta \sim \mathcal{D}_Y$, which samples a specific demand curve $d\!\left(p\mid Y,\theta\right)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$.
Practical implementation of browser agents is a strongly evolving field with near-weekly releases of SOTA architectures. In this thesis implementation we abstract that layer into trajectory generators learned from observed human/agent transition kernels.
As part of reward engineering, we keep a UX factor ($UX\in[0,1]$) as an auxiliary evaluation axis. In the current baseline it is not injected into the core reward; it is tracked separately to compare policy trade-offs.
As part of reward engineering, we keep a UX factor ($UX\in[0,1]$) as an auxiliary evaluation axis. In code, the UX index is implemented as a volatility penalty on relative price changes, with an extra upward-volatility component weighted by $0.5$ and scaled by $\eta_{\text{ux}}$ and an information-budget term. We also keep a separate supra-competitive penalty tied to persistent price excess above a competitive anchor, which punishes high-price behavior even when volatility is low.
We measure volatility as mean absolute relative price movement, $v_t=\frac{1}{N}\sum_{i=1}^N\bigl|(p_{t,i}-p_{t-1,i})/\max(p_{t-1,i},1)\bigr|$.
\begin{figure}[ht]
\centering
\resizebox{0.5\columnwidth}{!}{%
\input{chapters/balance_figure.tex}
}
\caption{Introducing the UX index allows us to better distinguish the kind of impact different methods have and allows us to compare them on this Pareto-like scale.}
\caption{Introducing the UX index allows us to better distinguish the kind of impact different methods have and allows us to compare them on this Pareto-efficiency-like scale.}
\end{figure}
We also consider taxation-like overlays for agent traffic under strategy-proof mechanism design (e.g., Vickrey-Clarke-Groves style rules). This remains an extension path and is not part of the main implementation in this thesis.
@@ -511,13 +563,13 @@ We now present the complete pricing mechanism that integrates the behavioral dis
\end{algorithm}
The algorithm operates in discrete epochs indexed by $t$. At each epoch, the platform applies one discrete multiplicative price action, the environment samples a batch of sessions, and demand is recomputed from weighted events. Robustness is implemented as an inner minimization over a small local grid of contamination candidates around nominal $\alpha_0$, matching the current engine implementation. The history buffer $\mathcal{L}$ (``Limbo'' in our implementation) enforces the alternating Stackelberg structure by preserving the temporal sequence of price publications and demand observations.
The algorithm operates in discrete epochs indexed by $t$. At each epoch, the platform applies one discrete multiplicative price action, the environment samples a batch of sessions, and demand is recomputed from weighted events. Robustness is implemented as an inner minimization over a small local grid of contamination candidates around nominal $\alpha_0$, matching the current engine implementation. The history buffer $\mathcal{L}$ enforces the alternating Stackelberg structure by preserving the temporal sequence of price publications and demand observations.
%The defensive price update in Line 24 implements contamination-aware margin shrinkage: as estimated contamination $\hat{\alpha}_t$ rises, the margin $(p^{\mathrm{ref}} - c)$ is reduced by factor $\kappa\in[0,1]$, with projection $\Pi_{\mathcal{P}}$ ensuring feasibility. In subsequent experiments this heuristic rule is replaced by DR-RL policy $\pi^*$ from Eq.~\ref{eq:robust_policy}.
\subsection{Parallelization Strategy}
To avoid preemption of compute mid-training we settle on using a v4 generation, 40 chip compute node with 5 parallel workers. The login node creates an orchestration node with Ray \parencite{moritz_ray_2018} and we distribute ray compute nodes per each other worker.
To reduce mid-job preemption we standardize on a TPU v4 allocation with 40 chips and five workers. A head process launches Ray \parencite{moritz_ray_2018} and attaches workers across the remaining hosts.
\subsubsection{Computational Cost Analysis of the Simulation Step}
The per-step cost of Algorithm~\ref{alg:phantom_loop_clean} is not uniform across its components. To inform hardware provisioning and to identify where algorithmic improvements are most impactful, we profile the hot path of the engine using Python's \texttt{cProfile} instrumentation over 20 environment steps under two configurations: a baseline with the robustness inner loop disabled ($K=1$, $\epsilon_\alpha=0$) and a standard robust setting ($K=5$, $\epsilon_\alpha=0.2$). Both runs use $M=10$ sessions per market call and $N=3$ products.
@@ -526,7 +578,7 @@ The baseline achieves approximately 26 steps per second. Enabling the robustness
\begin{table}[ht]
\centering
\caption{Per-step profiling results (20 steps, $M=10$ sessions, $N=3$ products). Self-time measures time spent inside the function excluding callees; cumulative time includes the full call subtree.}
\caption{Per-step profiling results (20 steps, $M=10$ sessions, $N=3$ products). Self-time measures time spent inside the function excluding callees and cumulative time includes the full call subtree.}
\label{tab:profile_results}
\begingroup
\small

View File

@@ -1,8 +1,14 @@
\section{Results}
\label{sec:results}
% The gap we target is not detection for its own sake but whether behavioral signals can support pricing decisions once agent traffic is present. This section follows the supporting questions in \cref{sec:research_questions}: we first establish session-level distinguishability (behavioral evidence and a rank test), then estimate how contamination shifts revenue in a controlled sweep, and finally compare robust and baseline policies under factorial training with COI and revenue readouts. The ordering is deliberate---each stage feeds the next so that separability, contamination effects, and policy outcomes form one connected line of evidence.
In our work, the gap we target is not the detection for its own sake. Our aim is to understand behavioral signals which can support pricing decisions once agent traffic is present. Now we set to conclude and piece together the path we laid out in \cref{sec:research_questions}. We established distinguishability (behavioral evidence and test) that estimate how contamination shifts revenue in an adversarial environment and finally we compare robust and baseline pricing under factorial training.
\begin{figure}[ht]
\centering
\input{chapters/figures/supra/supra.tex}
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as SAC as can be clearly seen by the high density at the highest available price.}
\caption{Evolution of price distributions over experiment steps. The heatmap illustrates the density of price offerings. This is an early baseline simulation which demonstrates supra-competitive price-setting in deep learning agents such as Soft Actor Critic as can be clearly seen by the high density at the highest available price.}
\label{fig:supra_heatmap}
\end{figure}
@@ -40,7 +46,26 @@ We report two preliminary stages before the full factorial interpretation. First
\subsubsection{The Impact of Contamination on Revenue}
A linear fit test on run-level data ($n=95$) shows a strong negative association between contamination and mean revenue. The fitted model mapping $\alpha \to \text{revenue}$ result in $t(93)=-8.2148$, $p=1.20\times 10^{-12}$, $R^2=0.4205$, and a 95\% confidence interval for the slope of $[-75{,}288.76,\,-45{,}975.13]$. In practical terms, a $+0.1$ increase in $\alpha$ corresponds to an average decrease of about $6{,}063$ revenue units within our environment.
The contamination--revenue slope is estimated on a controlled cohort (single sweep, baseline policy, $n_{\text{products}}=100$, $n=95$). In this setting, contamination $\alpha$ is set exogenously by the experiment, so the slope identifies the within-sweep causal effect of contamination on revenue under fixed policy and environment settings. These results are in favor of our second research question \hyperlink{sq2}{\textbf{SQ2}} (\textit{Theoretical Impact}) from \cref{sec:research_questions}.
\begin{table}[ht]
\centering
\caption{Slope verification table for contamination versus revenue.}
\label{tab:contamination_slope_table}
\begin{tabular}{@{}lrrrrr@{}}
\toprule
Term & Coef. & Std. Err. & $t$ & $p>|t|$ & 95\% CI \\
\midrule
Intercept & 348,823.41 & 784.29 & 444.77 & $<10^{-99}$ & $[347,264.96,\,350,381.86]$ \\
$\alpha$ & $-90,140.53$ & 1,466.90 & $-61.45$ & $4.27\times10^{-77}$ & $[-93,053.38,\,-87,227.68]$ \\
\midrule
HC1 robust check ($\alpha$) & $-90,140.53$ & 2,185.22 & $-41.25$ & $1.42\times10^{-61}$ & -- \\
\bottomrule
\end{tabular}
\end{table}
Interpreted on the contamination grid, a $+0.1$ increase in $\alpha$ corresponds to an average revenue decrease of about $9{,}014$ units, and the robust check preserves both direction and significance.
% TODO: add a compact proposal note for re-running tests with statsmodels in the appendix methodology notes.
\subsubsection{Large Scale Factorial Training}
@@ -54,32 +79,33 @@ In our complete training runs we logged $\approx 180$ days of net compute time.
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_revenue_by_alpha.tex}
\input{chapters/figures/results/includes/final_focus_revenue_by_alpha.tex}
\caption{Revenue curves by contamination for the final cohort. The baseline remains above the defended curve in most cells, but the gap narrows in the high-contamination region.}
\label{fig:final_focus_revenue_by_alpha}
\end{figure}
% TODO: we need a similar plot which shows the COI preserved (what we gain across teh multiple conatmination leves, showing that the robust method has better COI optimization.)
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_revenue_delta.tex}
\caption{Defended-minus-baseline revenue delta over contamination for the final cohort. The strongest high-contamination deviation begins at $\alpha=0.7$, followed by recovery toward near parity by $\alpha=1.0$.}
\label{fig:final_focus_revenue_delta}
\input{chapters/figures/results/includes/final_focus_coi_by_alpha.tex}
\caption{COI level curves by contamination for the final cohort. The shaded band marks the per-$\alpha$ gap between defended and baseline policies.}
\label{fig:final_focus_coi_by_alpha}
\end{figure}
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_risk_deltas.tex}
\caption{Defended-minus-baseline leakage and volatility deltas for the final cohort. Leakage remains lower for the defended policy across the full contamination range.}
\label{fig:final_focus_risk_deltas}
\input{chapters/figures/results/includes/final_focus_coi_preservation_grid.tex}
\caption{COI preservation by product count at the contamination endpoints ($\alpha=0.0$ and $\alpha=1.0$). Bars report defended-minus-baseline mean COI level, with the zero line separating preservation from erosion.}
\label{fig:final_focus_coi_preservation_grid}
\end{figure}
\subsection{Interpretation and Insights}
The Mann-Whitney result ($p<0.001$) confirms that per-session divergence gaps distinguish the two actor classes with near-zero overlap in rank ordering. This is the condition required for distinguishability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
The Mann-Whitney result ($p<0.001$) confirms that per-session divergence gaps distinguish the two actor classes with near-zero overlap in rank ordering. This is the condition required for distinguishability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score. This is a direct result relevant to our first pillar \hyperlink{sq1}{\textbf{SQ1}} (\textit{Distinguishability}) from \cref{sec:research_questions}.
The first calibration and paired benchmark runs additionally confirm three practical points aligned with the thesis. First, the control loop is reproducible end-to-end (training, evaluation, artifact generation) across algorithms and contamination levels. Second, policy class materially changes price trajectories and resulting COI/revenue profiles under identical environment settings. Third, objective improvements from robustness are regime-dependent in the current baseline, which is consistent with the thesis claim that contamination-aware pricing needs explicit calibration rather than a one-size-fits-all penalty.
We also note that maximizing revenue in isolation can favor aggressive high-price behavior; even in these early runs, the non-robust aggregate shows slightly higher mean COI and margin. For this reason, all subsequent reporting in this thesis is interpreted on a multi-metric basis (objective, revenue, COI, and stability), and not by revenue alone.
We also note that maximizing revenue in isolation can favor aggressive high-price behavior, even in our early runs, the non-robust aggregate shows slightly higher mean COI and margin. For this reason, all subsequent reporting in this thesis is interpreted on a multi-metric basis (objective, revenue, COI, and stability), and not by revenue alone. This is another direct answer to our third pillar \hyperlink{sq3}{\textbf{SQ3}} (\textit{Robust Mitigation}) from \cref{sec:research_questions}.
\subsection{Anomalies}

View File

@@ -1,19 +1,26 @@
\section{Discussion}
\label{sec:discussion}
% TODO: Gpdr here
\subsection{Transition to Agentic Market Microstructure}
Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungability however).
However, ecommerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1, such a transaction is not permissible. The platform must establish an initial valuation anchor ($P_{0}$) defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate. We float the introduction of GenAI Agents as Institutional Market Makers. As the arms race for greater autonomy of agnetic systems grows, the commercial viability of AI agents has the potential to disseminate into every-day users directly interacting with them rather than e-commerce platforms. This is also under the assumption of expected transactional capabilities being given to AI Agents.
Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungibility however).
However, e-commerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1. Such a transaction is not permissible. The platform must establish an initial valuation anchor ($P_0$) defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate.
We float the introduction of GenAI Agents as Institutional Market Makers. As the arms race for greater autonomy of agentic systems grows, the commercial viability of AI agents has the potential to disseminate into everyday users directly interacting with them rather than e-commerce platforms. This is also under the assumption of expected transactional capabilities being given to AI Agents.
\subsection{Risk Assessment and Limitations}
\label{sec:limitations_risks}
This technology does not come without a more bitter side, ethical concerns do arise from the idea of deploying black-box like solutions to set prices based on a behavioral attributes. Approaches like universal behavioral profile modeling (UBPM) used in recommendation systems is very broadly utilized.
Behavior-based pricing raises predictable ethics questions when models are opaque: a behavioral profile can become a basis for price discrimination or exclusion if deployed without governance. Universal behavioral profile modeling (UBPM) in recommendation already shows how fine-grained traces enable strong personalization. The same machinery applied to prices needs guardrails.
With a system like this there is potential for strong drift given the rapid advance of agentic systems and user preference. Our intent behind adding the UX term into the reward shaping process was to further address the risk of degraded user experience. Looking deeper at the underlying methodology, reinforcement learning does not come without it's complications such as reward hacking and often the lack of intepretability which is quite critical in systems that have a strong impact on the revenue of a company.
We balance human and agent sessions near one-to-one so cohorts are comparable despite different population sizes. The row-level dataset still contains thousands of events.
% Rapid change in agent capabilities and user expectations induces model drift; the UX term in reward shaping was included partly to penalize policies that sacrifice legitimate users for short-run revenue. Reinforcement learning adds its own risks---reward hacking and limited interpretability---which matter when policies touch live revenue; deployment would require monitoring and constraints beyond what we exercised here.
With the exponential growth in capability of agents aswell as user expectations, a degree of model drift is expected in this setting. The computational requirements for continuous extraction of margin as demonstrated by our work are required by the persistent speed of the market. Reinforcement learning that sacrifices legitimate user experience for short run revenue does not hold up in the long run. Reward hacking, to which pricing algorithms are not impervious due to their limited interpretability, is a significant risk for a company if live revenue is in play. Deployment requires consistent monitoring and constraints beyond what was done as an exercise in this work.
% \subsection{Implications of Findings} Interpretation of results and altenrative scenarios with broader market implications.

View File

@@ -1,24 +1,27 @@
\section{Conclusion}
\label{sec:conclusion}
Our research has explored how reinforcement learning works within pricing systems and environments which are substantially disrupted by an adversarial participant. Our findings include the optimization for our newly introduced metrics.
This thesis examined reinforcement-learning policies for dynamic pricing when a fraction of traffic is orchestrated by non-human agents intent on extracting information before purchase. We introduced COI-oriented metrics, a behavioral distinguishability layer, and a distributionally robust training loop, empirical runs show where robustness helps and where it must be tuned.
\subsection{Summary of contributions}
The contribution was not without the advice of many experienced experts in the field. We thank Marco Casalaina VP Products, Core AI and AI Futurist at Microsoft for the initial critical discussion on the topic of dynamic pricing systems and the spark which has lead to this work. Eugene Bykovets, PhD pointing out the parallels in blockchain systems and the complexity of anonymous interaction and understanding of intent. Importantly, the contributions of Alberto Martín Izquierdo, my academic advisor for the support over and for taking on the challenge of this ambitious work. Many breakthroughs were thanks to numerous discussions with my peers on the topics covered here.
A thanks to the head of innovation at Amadeus for insight into the industry split on the topic of collapsing margins. Finally we acknowledge the power and use of generative AI technologies for in depth research, rapid prototyping and surfacing of key topics and niches.
Our work has yielded a broad set of dependencies which we carefully orchestrated to give us measurable results. To give a clear picture we outline the specific contributions of each stage of our work. The theoretical component formalizes why agent-mediated reconnaissance erodes pricing power, the behavioral component establishes that such contamination is detectable from interaction traces alone, the control component translates that distinguishability into a robust pricing mechanism, and the systems component provides the controlled experimental environment required to observe, test, and reproduce these effects.
Now we very explicitly mention what we contribute in this paper:
\begin{itemize}
\item TPU-accelerated parallelization of the behavioral simulation and reinforcement learning pipeline, making large-scale factorial sweeps tractable.
\item TPU-accelerated parallelization of the behavioral simulation and reinforcement learning pipeline, making large factorial sweeps tractable.
\item Formalization of non-human transaction orchestration in e-commerce as a distinct source of contamination in dynamic pricing systems.
\item Definition of the Cost of Information (COI) as a mechanism-level quantity for pricing power, together with a theorem showing its erosion under increasing agent saturation.
\item Design and implementation of a controlled e-commerce research platform, built on a hybrid Kappa-Lambda architecture, for collecting and replaying high-fidelity interaction trajectories.
\item Construction and empirical validation of a behavioral distinguishability framework that distinguishes human and agent sessions from interaction signals alone using transition kernels and KL-based divergence.
\item Development of a generative contamination mechanism that injects learned agent behavior into the pricing environment for controlled robustness experiments.
\item Translation of behavioral distinguishability into a defensive pricing mechanism through a distributionally robust reinforcement learning formulation of pricing under non-stationary contamination.
\item Empirical evidence that agent contamination reduces revenue and that robustness is condition-dependent, requiring explicit calibration rather than a one-size-fits-all penalty.
\item Release of a reusable public experimental artifact for reproducing and extending research on dynamic pricing under agent-mediated traffic.
\item Definition of the Cost of Information (COI) as a mechanism-level quantity for pricing power, together with a theorem on its erosion under increasing agent saturation.
\item Design and implementation of a controlled e-commerce research platform on a hybrid Kappa--Lambda architecture for collecting and replaying high-fidelity interaction trajectories.
\item Construction and empirical validation of a behavioral distinguishability framework that separates human and agent sessions from interaction signals alone using transition kernels and KL-based divergence.
\item A generative contamination mechanism that injects learned agent behavior into the pricing environment for controlled robustness experiments.
\item Translation of distinguishability scores into defensive pricing via distributionally robust reinforcement learning under non-stationary contamination.
\item Evidence that contamination depresses revenue and that robustness gains are regime-dependent, so penalties and radii need calibration rather than a single default.
\item Release of a public experimental artifact (code and dataset) for reproducing and extending work on agent-mediated traffic.
\end{itemize}
\subsection{Future Works and Next Steps}
\subsection{Limitations and future work}
During the eights months of research dedicated to this work, a plethora of opportunities and industry gaps was identified, sadly a majority of which could not be addressed directly.
Several constraints are intentional and could be relaxed later. Action weights in the demand proxy are currently derived from simple divergence rankings, learning them from data is an obvious next step. We propose a jointly learn the demand proxy, policy, and simulator parameters instead of treating them modularly. Another avenue we could not cover in this work is incorporating Bayesian methods better capture demand uncertainty and propagation of that uncertainty into reward systems.
The Stackelberg interface assumes a clean alternation between platform move and market response. Richer histories (multi-agent, multi-platform) would need a less rigid state definition. Non-perishable catalog supply in the simulator widens the sim-to-real gap for inventory-constrained domains. Within-session contamination is modeled as stable, time-varying $\alpha$ inside a session would better match some attack patterns.
Before any deployment, human baselines should grow beyond the convenience sample used here, catalog scaling laws should be re-checked when transition matrices grow with SKU count, and the full pipeline should be re-validated under production traffic volumes, governance constraints, and product mixes.
We conclude our work with enthusiasm for future developments in the field of agent mediated commerce, we are excited to provide the foundations for these developments and hope to see future work in similar spirit.

View File

@@ -1,3 +1,7 @@
\section{Acknowledgements}
\section*{Acknowledgements}
Eugene Bykovets, PhD - ETH
This research was supported by the TPU Research Cloud program, which provided access to Google Cloud Tensor Processing Unit (TPU) accelerators, including TPU v4, v5e, and v6e.
I am grateful to Marco Casalaina (VP of Product, Core AI, Microsoft) for an early conversation on dynamic pricing that helped frame the problem. Eugene Bykovets (Ph.D.) pointed out useful parallels with blockchain systems and the difficulty of inferring intent under pseudonymity. Alberto Mart\'{i}n Izquierdo supervised this work and accepted an unusually wide brief. Several peers contributed through discussion of the topics covered here. The head of innovation at Amadeus offered industry perspective on margin compression under automation.
Generative tools were used for literature search, prototyping, and drafting support; all claims, experiments, and final wording remain the author's responsibility.

View File

@@ -0,0 +1,36 @@
% Horizontal tree: level distance must exceed ~half parent + half child width or nodes overlap (resizebox does not fix that).
\begin{tikzpicture}[
grow=right,
level distance=30mm,
sibling distance=23mm,
decision/.style={
rectangle,
draw,
rounded corners=1.5pt,
align=center,
inner sep=1.2pt,
minimum width=14mm,
minimum height=4.8mm,
font=\scriptsize,
},
leaf/.style={
rectangle,
draw,
align=center,
inner sep=1.2pt,
text width=19mm,
minimum height=4mm,
font=\scriptsize,
},
edge from parent/.style={draw, -{Latex[length=1.2mm]}},
]
\node[decision] {Participant}
child {
node[decision] {Platform: Hotel}
child {node[leaf] {Task sampled\\from hotel pool}}
}
child {
node[decision] {Platform: Airline}
child {node[leaf] {Task sampled\\from airline pool}}
};
\end{tikzpicture}

View File

@@ -3,7 +3,7 @@ alpha,revenue_delta,revenue_delta_pct,reward_delta,reward_delta_pct,volatility_d
0.1,-14962.041501283413,-4.410637208586118,-14303.760282736213,-4.531344436782669,0.0011858665298920962,0.0,-0.004133727080174038
0.2,-16153.416666167905,-4.826514761457546,-15398.621298776357,-4.9418165571901715,0.00200624274016295,0.0,-0.0033201883450373615
0.3,-17294.9275360335,-5.382423616385397,-16544.91845114401,-5.533399709364953,-0.0011022484400295268,0.0,-0.0029151149203366505
0.4,-19661.294346174283,-6.250307313590199,-18728.35578200908,-6.3953153560217535,3.582812967113658e-05,0.0,-0.0038123361988749577
0.4,-19543.8750398212,-6.215299839915013,-18613.487687777204,-6.35858461426586,-2.7530592947980215e-05,0.0,-0.0038561140856475523
0.5,-16411.03168918495,-5.3630681206030015,-15638.77510066732,-5.4888928630525315,0.00015428950526953644,0.0,-0.00439661338956944
0.6,-14729.668247641937,-5.069964928178309,-13912.22417824401,-5.148827377884945,-0.002735776807082743,0.0,-0.004310129386364658
0.7,-21160.81910514756,-7.351404104505076,-20171.762105623755,-7.525169314210056,-0.0008903632602569461,0.0,-0.0026198461183787186
1 alpha revenue_delta revenue_delta_pct reward_delta reward_delta_pct volatility_delta supra_delta coi_leakage_delta
3 0.1 -14962.041501283413 -4.410637208586118 -14303.760282736213 -4.531344436782669 0.0011858665298920962 0.0 -0.004133727080174038
4 0.2 -16153.416666167905 -4.826514761457546 -15398.621298776357 -4.9418165571901715 0.00200624274016295 0.0 -0.0033201883450373615
5 0.3 -17294.9275360335 -5.382423616385397 -16544.91845114401 -5.533399709364953 -0.0011022484400295268 0.0 -0.0029151149203366505
6 0.4 -19661.294346174283 -19543.8750398212 -6.250307313590199 -6.215299839915013 -18728.35578200908 -18613.487687777204 -6.3953153560217535 -6.35858461426586 3.582812967113658e-05 -2.7530592947980215e-05 0.0 -0.0038123361988749577 -0.0038561140856475523
7 0.5 -16411.03168918495 -5.3630681206030015 -15638.77510066732 -5.4888928630525315 0.00015428950526953644 0.0 -0.00439661338956944
8 0.6 -14729.668247641937 -5.069964928178309 -13912.22417824401 -5.148827377884945 -0.002735776807082743 0.0 -0.004310129386364658
9 0.7 -21160.81910514756 -7.351404104505076 -20171.762105623755 -7.525169314210056 -0.0008903632602569461 0.0 -0.0026198461183787186

View File

@@ -7,7 +7,7 @@ alpha,mode,runs,revenue_mean,reward_mean,supra_mean,volatility_mean,coi_leakage_
0.2,defended,35,318527.35122792586,296199.77820822067,0.0,0.07048630468445288,0.11265850300394666,137.2758153292305
0.3,baseline,30,321322.30327214615,299000.9636054795,0.0,0.07085669473747759,0.11527347603412934,136.4452630715689
0.3,defended,44,304027.37573611265,282456.0451543355,0.0,0.06975444629744806,0.11235836111379269,136.4704115371568
0.4,baseline,33,314565.2423109539,292844.914432166,0.0,0.07031811881503117,0.11300307992768284,136.72547178046122
0.4,baseline,33,314447.8230046008,292730.04633793415,0.0,0.07038147753765028,0.11304685781445543,136.70817144219887
0.4,defended,38,294903.9479647796,274116.55865015695,0.0,0.0703539469447023,0.10919074372880788,136.75671002806396
0.5,baseline,33,306000.80625751516,284916.7489847879,0.0,0.06938663916591635,0.11118137138243217,136.9528780620641
0.5,defended,35,289589.7745683302,269277.9738841206,0.0,0.06954092867118589,0.10678475799286273,136.65018588845163
1 alpha mode runs revenue_mean reward_mean supra_mean volatility_mean coi_leakage_mean coi_level_mean
7 0.2 defended 35 318527.35122792586 296199.77820822067 0.0 0.07048630468445288 0.11265850300394666 137.2758153292305
8 0.3 baseline 30 321322.30327214615 299000.9636054795 0.0 0.07085669473747759 0.11527347603412934 136.4452630715689
9 0.3 defended 44 304027.37573611265 282456.0451543355 0.0 0.06975444629744806 0.11235836111379269 136.4704115371568
10 0.4 baseline 33 314565.2423109539 314447.8230046008 292844.914432166 292730.04633793415 0.0 0.07031811881503117 0.07038147753765028 0.11300307992768284 0.11304685781445543 136.72547178046122 136.70817144219887
11 0.4 defended 38 294903.9479647796 274116.55865015695 0.0 0.0703539469447023 0.10919074372880788 136.75671002806396
12 0.5 baseline 33 306000.80625751516 284916.7489847879 0.0 0.06938663916591635 0.11118137138243217 136.9528780620641
13 0.5 defended 35 289589.7745683302 269277.9738841206 0.0 0.06954092867118589 0.10678475799286273 136.65018588845163

View File

@@ -0,0 +1,45 @@
alpha,n_products,baseline_runs,defended_runs,baseline_coi_level_mean,defended_coi_level_mean,coi_preserved,coi_preserved_pct
0.0,5.0,9,10,137.060822623968,136.18680853180368,-0.874014092164316,-0.6376833842316922
0.0,25.0,9,2,137.114858903596,136.13793579187393,-0.9769231117220727,-0.7124852255501622
0.0,50.0,9,11,137.16224858153575,136.92415566181484,-0.23809291972091273,-0.17358487643878118
0.0,100.0,9,12,135.86629045322655,137.3609873086303,1.4946968554037596,1.1001234010420895
0.1,5.0,3,6,136.59581715538818,135.6308466787041,-0.9649704766840728,-0.7064421859904723
0.1,25.0,11,8,135.9860669350444,136.43616365263273,0.45009671758833747,0.33098737814318313
0.1,50.0,10,11,136.28362874897243,136.92880179422633,0.6451730452538982,0.4734046570203046
0.1,100.0,8,8,137.35578496752095,137.53394777402949,0.17816280650853855,0.12970899372797937
0.2,5.0,8,9,135.55116314329388,137.30311388107864,1.7519507377847674,1.2924645551973204
0.2,25.0,10,9,137.01587649612287,137.22137163685403,0.20549514073115915,0.1499790724887083
0.2,50.0,4,8,137.45096138958434,137.1307018163465,-0.32025957323784837,-0.2329991511155169
0.2,100.0,9,9,137.50780776750915,137.43195025898902,-0.07585750852013007,-0.0551659645744523
0.3,5.0,6,6,134.95569459599133,134.21855668602896,-0.7371379099623709,-0.5462073402453271
0.3,25.0,9,16,136.38346021911525,136.32131251342705,-0.06214770568820427,-0.04556835967378819
0.3,50.0,8,6,136.97414077213367,136.88041560990786,-0.09372516222580884,-0.06842544271310845
0.3,100.0,7,16,137.19706520314455,137.31020460277784,0.11313939963329744,0.08246488324351146
0.4,5.0,8,11,135.6494813257779,136.5487738152141,0.899292489436192,0.6629531352769695
0.4,25.0,7,9,136.38451372914378,136.10614648175604,-0.27836724738773455,-0.20410473284420322
0.4,50.0,7,10,137.12976275807247,136.98838321468799,-0.14137954338448822,-0.10309909427460566
0.4,100.0,11,8,137.4158065068933,137.4849148270489,0.06910832015560686,0.050291390715769026
0.5,5.0,7,19,135.91101413475477,136.145621134976,0.2346070002212457,0.1726180925915501
0.5,25.0,8,7,137.0972914279529,137.35620682163616,0.2589153936832531,0.18885522170896996
0.5,50.0,8,1,137.0714841014652,135.66696334266234,-1.404520758802846,-1.0246629837050352
0.5,100.0,10,8,137.4717672869487,137.35366167964338,-0.11810560730532416,-0.08591262746975456
0.6,5.0,8,13,133.13626070539635,136.09936023073067,2.9630995253343144,2.225614201296411
0.6,25.0,5,10,136.0741624588533,136.26219778039936,0.18803532154606728,0.13818591137970535
0.6,50.0,8,10,135.09036188289087,136.05846380616936,0.968101923278482,0.7166328595060871
0.6,100.0,7,8,137.29304001584052,137.07512338179083,-0.2179166340496863,-0.15872372993164377
0.7,5.0,7,7,136.0533783988379,135.14350016006424,-0.9098782387736719,-0.6687656341075052
0.7,25.0,8,11,137.12781750399415,136.8176582131797,-0.3101592908144539,-0.2261826203172962
0.7,50.0,14,11,137.06965735909125,136.7028634119364,-0.3667939471548607,-0.26759674914335285
0.7,100.0,11,11,137.48279078937205,137.09121810549402,-0.39157268387802446,-0.28481578067317975
0.8,5.0,4,7,135.3095773096514,136.59715728802078,1.2875799783693935,0.9515808148766959
0.8,25.0,12,13,136.93488398652164,135.73319876476054,-1.201685221761096,-0.8775596011600497
0.8,50.0,6,8,136.4704324290659,136.86568018140107,0.39524775233516607,0.289621528487943
0.8,100.0,4,11,137.519864039095,137.4763376137669,-0.04352642532811046,-0.03165100957032396
0.9,5.0,5,5,134.77024204025943,136.6651608019597,1.8949187617002679,1.4060364758669837
0.9,25.0,9,13,136.7554042236364,136.06108143100832,-0.6943227926280713,-0.507711411164888
0.9,50.0,10,12,136.08715955450202,137.07569864767092,0.988539093168896,0.7264014447836223
0.9,100.0,11,9,137.57053132642514,137.30115968842037,-0.2693716380047704,-0.19580620602940735
1.0,5.0,5,7,136.43177888041947,135.92674388998284,-0.5050349904366271,-0.37017401266847305
1.0,25.0,11,9,136.7037183889911,136.22617845471228,-0.47753993427880914,-0.34932475861407586
1.0,50.0,11,5,136.93074105866745,137.05826644845806,0.12752538979060546,0.09313130769953819
1.0,100.0,8,9,136.4880191421812,137.41913068956546,0.9311115473842619,0.682192879079234
1 alpha n_products baseline_runs defended_runs baseline_coi_level_mean defended_coi_level_mean coi_preserved coi_preserved_pct
2 0.0 5.0 9 10 137.060822623968 136.18680853180368 -0.874014092164316 -0.6376833842316922
3 0.0 25.0 9 2 137.114858903596 136.13793579187393 -0.9769231117220727 -0.7124852255501622
4 0.0 50.0 9 11 137.16224858153575 136.92415566181484 -0.23809291972091273 -0.17358487643878118
5 0.0 100.0 9 12 135.86629045322655 137.3609873086303 1.4946968554037596 1.1001234010420895
6 0.1 5.0 3 6 136.59581715538818 135.6308466787041 -0.9649704766840728 -0.7064421859904723
7 0.1 25.0 11 8 135.9860669350444 136.43616365263273 0.45009671758833747 0.33098737814318313
8 0.1 50.0 10 11 136.28362874897243 136.92880179422633 0.6451730452538982 0.4734046570203046
9 0.1 100.0 8 8 137.35578496752095 137.53394777402949 0.17816280650853855 0.12970899372797937
10 0.2 5.0 8 9 135.55116314329388 137.30311388107864 1.7519507377847674 1.2924645551973204
11 0.2 25.0 10 9 137.01587649612287 137.22137163685403 0.20549514073115915 0.1499790724887083
12 0.2 50.0 4 8 137.45096138958434 137.1307018163465 -0.32025957323784837 -0.2329991511155169
13 0.2 100.0 9 9 137.50780776750915 137.43195025898902 -0.07585750852013007 -0.0551659645744523
14 0.3 5.0 6 6 134.95569459599133 134.21855668602896 -0.7371379099623709 -0.5462073402453271
15 0.3 25.0 9 16 136.38346021911525 136.32131251342705 -0.06214770568820427 -0.04556835967378819
16 0.3 50.0 8 6 136.97414077213367 136.88041560990786 -0.09372516222580884 -0.06842544271310845
17 0.3 100.0 7 16 137.19706520314455 137.31020460277784 0.11313939963329744 0.08246488324351146
18 0.4 5.0 8 11 135.6494813257779 136.5487738152141 0.899292489436192 0.6629531352769695
19 0.4 25.0 7 9 136.38451372914378 136.10614648175604 -0.27836724738773455 -0.20410473284420322
20 0.4 50.0 7 10 137.12976275807247 136.98838321468799 -0.14137954338448822 -0.10309909427460566
21 0.4 100.0 11 8 137.4158065068933 137.4849148270489 0.06910832015560686 0.050291390715769026
22 0.5 5.0 7 19 135.91101413475477 136.145621134976 0.2346070002212457 0.1726180925915501
23 0.5 25.0 8 7 137.0972914279529 137.35620682163616 0.2589153936832531 0.18885522170896996
24 0.5 50.0 8 1 137.0714841014652 135.66696334266234 -1.404520758802846 -1.0246629837050352
25 0.5 100.0 10 8 137.4717672869487 137.35366167964338 -0.11810560730532416 -0.08591262746975456
26 0.6 5.0 8 13 133.13626070539635 136.09936023073067 2.9630995253343144 2.225614201296411
27 0.6 25.0 5 10 136.0741624588533 136.26219778039936 0.18803532154606728 0.13818591137970535
28 0.6 50.0 8 10 135.09036188289087 136.05846380616936 0.968101923278482 0.7166328595060871
29 0.6 100.0 7 8 137.29304001584052 137.07512338179083 -0.2179166340496863 -0.15872372993164377
30 0.7 5.0 7 7 136.0533783988379 135.14350016006424 -0.9098782387736719 -0.6687656341075052
31 0.7 25.0 8 11 137.12781750399415 136.8176582131797 -0.3101592908144539 -0.2261826203172962
32 0.7 50.0 14 11 137.06965735909125 136.7028634119364 -0.3667939471548607 -0.26759674914335285
33 0.7 100.0 11 11 137.48279078937205 137.09121810549402 -0.39157268387802446 -0.28481578067317975
34 0.8 5.0 4 7 135.3095773096514 136.59715728802078 1.2875799783693935 0.9515808148766959
35 0.8 25.0 12 13 136.93488398652164 135.73319876476054 -1.201685221761096 -0.8775596011600497
36 0.8 50.0 6 8 136.4704324290659 136.86568018140107 0.39524775233516607 0.289621528487943
37 0.8 100.0 4 11 137.519864039095 137.4763376137669 -0.04352642532811046 -0.03165100957032396
38 0.9 5.0 5 5 134.77024204025943 136.6651608019597 1.8949187617002679 1.4060364758669837
39 0.9 25.0 9 13 136.7554042236364 136.06108143100832 -0.6943227926280713 -0.507711411164888
40 0.9 50.0 10 12 136.08715955450202 137.07569864767092 0.988539093168896 0.7264014447836223
41 0.9 100.0 11 9 137.57053132642514 137.30115968842037 -0.2693716380047704 -0.19580620602940735
42 1.0 5.0 5 7 136.43177888041947 135.92674388998284 -0.5050349904366271 -0.37017401266847305
43 1.0 25.0 11 9 136.7037183889911 136.22617845471228 -0.47753993427880914 -0.34932475861407586
44 1.0 50.0 11 5 136.93074105866745 137.05826644845806 0.12752538979060546 0.09313130769953819
45 1.0 100.0 8 9 136.4880191421812 137.41913068956546 0.9311115473842619 0.682192879079234

View File

@@ -1,11 +1,14 @@
{
"bundle": "engine/studies/results/wandb_sweep_bundles/bundle_20260317_093826",
"bundle": "/home/velocitatem/Documents/Projects/PHANTOM/engine/studies/results/wandb_sweep_bundles/bundle_20260317_122818",
"focus_cohort": "max_alpha_coverage",
"focus_sweep_id": "i88nw811",
"focus_run_count": 768,
"git_commit": "ace52e8e14e0f7fa96ab5eb113c0c898b0bce1a0",
"alpha_cells": 11,
"alpha_min": 0.0,
"alpha_max": 1.0,
"mean_revenue_delta_pct": -4.787221975639986,
"mean_reward_delta_pct": -4.91730667541704,
"mean_revenue_delta_pct": -4.784039478033151,
"mean_reward_delta_pct": -4.913967517075595,
"zone_summary": [
{
"zone": "high_alpha_0_7_plus",
@@ -18,10 +21,10 @@
{
"zone": "low_alpha_below_0_7",
"alpha_cells": 7,
"revenue_delta_pct_mean": -5.201949225367208,
"reward_delta_pct_mean": -5.324947138914036,
"coi_leakage_delta_mean": -0.0037041938968711296,
"volatility_delta_mean": 0.00011102505536893832
"revenue_delta_pct_mean": -5.196948157699325,
"reward_delta_pct_mean": -5.319699890091765,
"coi_leakage_delta_mean": -0.003710447880695786,
"volatility_delta_mean": 0.00010197380928049306
}
]
}

View File

@@ -1,3 +1,3 @@
zone,alpha_cells,revenue_delta_pct_mean,reward_delta_pct_mean,coi_leakage_delta_mean,volatility_delta_mean
high_alpha_0_7_plus,4,-4.0614492886173466,-4.2039358642972955,-0.0018236753956396637,0.00026289072427068336
low_alpha_below_0_7,7,-5.201949225367208,-5.324947138914036,-0.0037041938968711296,0.00011102505536893832
low_alpha_below_0_7,7,-5.196948157699325,-5.319699890091765,-0.003710447880695786,0.00010197380928049306
1 zone alpha_cells revenue_delta_pct_mean reward_delta_pct_mean coi_leakage_delta_mean volatility_delta_mean
2 high_alpha_0_7_plus 4 -4.0614492886173466 -4.2039358642972955 -0.0018236753956396637 0.00026289072427068336
3 low_alpha_below_0_7 7 -5.201949225367208 -5.196948157699325 -5.324947138914036 -5.319699890091765 -0.0037041938968711296 -0.003710447880695786 0.00011102505536893832 0.00010197380928049306

View File

@@ -0,0 +1,24 @@
{
"normality": {
"test": "jarque_bera",
"available": true,
"statistic": 362.38850707984324,
"p_value": 2.0339278125496517e-79
},
"heteroskedasticity": {
"test": "breusch_pagan",
"available": true,
"lm_stat": 6.0366025380616275,
"df": 1,
"p_value": 0.014012224810767138
},
"influence": {
"max_leverage": 0.03769234230180875,
"mean_leverage": 0.021052631578947392,
"high_leverage_threshold": 0.042105263157894736,
"high_leverage_count": 0,
"max_cooks_distance": 0.29121755538277183,
"high_cooks_threshold": 0.042105263157894736,
"high_cooks_count": 6
}
}

View File

@@ -0,0 +1,96 @@
sweep_id,sweep_full_id,run_id,run_name,state,run_url,created_at,runtime,downloaded_files,history_rows,selected_for_clone,download_error,alpha,n_products,eta_ux,lambda_coi,baseline_mode,no_robust,study_mode,eval_revenue_mean,eval_reward_mean,eval_stress_revenue_worst,eval_stress_reward_worst,eval_supra_share_mean,eval_supra_penalty_mean,eval_volatility_mean,eval_upward_volatility_mean,eval_coi_level_mean,eval_coi_leakage_mean,objective_score,mode
i88nw811,lusiana/capstone_tpu/i88nw811,0yph6ddt,sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/0yph6ddt,2026-03-15T13:48:47Z,7579.766959963,0,0,0,,0.7,100.0,0.0,0.05,True,True,baseline,285875.15518050164,266287.2051805016,274356.50146499986,255620.24146499988,0.0,0.0,0.0711188680417482,0.0,137.42722406640746,0.1099719716550294,255620.24146499988,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,bjwmxlf4,sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/bjwmxlf4,2026-03-15T13:48:49Z,7514.003863569,0,0,0,,0.9,100.0,0.0,0.05,True,True,baseline,267194.6114143838,248902.78141438385,258791.60782635584,241079.0878263559,0.0,0.0,0.0706779448814682,0.0,137.4716591479769,0.1060063717489262,241079.0878263559,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,afod7srx,sweep/ppo/sb3/cpu/default/a0/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/afod7srx,2026-03-15T13:48:55Z,8428.923550896,0,0,0,,0.0,100.0,0.0,0.15,True,True,baseline,331626.71399641165,307929.2839964116,301903.22363424243,278909.22363424255,0.0,0.0,0.0699106903089938,0.0,134.44341240328637,0.1239456985672444,278909.22363424255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,czbwbw4o,sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/czbwbw4o,2026-03-15T13:48:55Z,8019.834460958,0,0,0,,0.3,100.0,0.0,0.05,True,True,baseline,325062.60932028474,302657.9893202848,313580.73955351143,292103.1195535114,0.0,0.0,0.0700934793925504,0.0,137.30226556155992,0.1156304945350146,292103.1195535114,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,spncr5i5,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/spncr5i5,2026-03-15T13:48:57Z,7984.536208498,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,9utcbgal,sweep/ppo/sb3/cpu/default/a0.6/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/9utcbgal,2026-03-15T13:48:58Z,7794.573495005,0,0,0,,0.6,100.0,0.0,0.3,True,True,baseline,296881.4938150014,276559.4338150014,282693.0664052287,263321.0864052287,0.0,0.0,0.0689497793839256,0.0,137.65459475595475,0.1116745762120893,263321.0864052287,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,6uhc0zfi,sweep/ppo/sb3/cpu/default/a0.1/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/6uhc0zfi,2026-03-15T13:48:59Z,8739.343652451,5,5000,1,,0.1,100.0,0.0,0.3,True,True,baseline,345607.36851277394,321934.388512774,330271.9018417394,307619.2418417394,0.0,0.0,0.0688978199434404,0.0,137.65927138408344,0.1180576040723697,307619.2418417394,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,mid9h16o,sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/mid9h16o,2026-03-15T13:48:59Z,7934.709025792,0,0,0,,0.3,100.0,0.0,0.15,True,True,baseline,321120.1030044527,298922.9430044526,312002.2572538445,290604.6972538445,0.0,0.0,0.0725338635316591,0.0,136.9642983472208,0.1152504371251349,290604.6972538445,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,hm8geh95,sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/hm8geh95,2026-03-15T13:49:01Z,8324.170881475,0,0,0,,0.3,100.0,0.0,0.05,True,True,baseline,321120.1030044527,298922.9430044526,312002.2572538445,290604.6972538445,0.0,0.0,0.0725338635316591,0.0,136.9642983472208,0.1152504371251349,290604.6972538445,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,2k3bx48e,sweep/ppo/sb3/cpu/default/a0.7/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/2k3bx48e,2026-03-15T13:49:03Z,7579.046562713,0,0,0,,0.7,100.0,0.0,0.3,True,True,baseline,288003.5379862045,268208.7279862045,274205.49798255006,255466.81798255,0.0,0.0,0.0732015803628115,0.0,137.25851714050424,0.1065894678006264,255466.81798255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,mlcllxuf,sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/mlcllxuf,2026-03-15T15:28:13Z,8048.447950291,0,0,0,,0.3,100.0,0.0,0.05,True,True,baseline,325062.60932028474,302657.9893202848,313580.73955351143,292103.1195535114,0.0,0.0,0.0700934793925504,0.0,137.30226556155992,0.1156304945350146,292103.1195535114,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,gsx5p3xl,sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/gsx5p3xl,2026-03-15T15:29:00Z,7666.062008427,0,0,0,,0.7,100.0,0.0,0.3,True,True,baseline,286859.8032779717,267231.9932779717,273198.5349293896,254530.3349293896,0.0,0.0,0.0694378534785247,0.0,137.6169536272908,0.1086813731317916,254530.3349293896,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,dh2sidg0,sweep/ppo/sb3/cpu/default/a0.8/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/dh2sidg0,2026-03-15T15:31:51Z,7450.114589126,0,0,0,,0.8,100.0,0.0,0.3,True,True,baseline,277537.1135308166,258574.23353081665,260525.6140973399,242761.4740973399,0.0,0.0,0.0691119185711536,0.0,137.63850710873982,0.1055234893030045,242761.4740973399,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,izb1xfjn,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/izb1xfjn,2026-03-15T15:38:35Z,8138.431632101,0,0,0,,0.4,100.0,0.0,0.05,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,h5v0bjkk,sweep/ppo/sb3/cpu/default/a1/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/h5v0bjkk,2026-03-15T15:53:08Z,7430.137394885,0,0,0,,1.0,100.0,0.0,0.05,True,True,baseline,258250.4083985968,240558.37839859675,257579.27605596423,239906.35605596425,0.0,0.0,0.0710781742010645,0.0,137.43891114039735,0.1034797519569495,239906.35605596425,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,oo9x7mtj,sweep/ppo/sb3/cpu/default/a0/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/oo9x7mtj,2026-03-15T17:08:57Z,8434.676111878,0,0,0,,0.0,100.0,0.0,0.15,True,True,baseline,331626.71399641165,307929.2839964116,301903.22363424243,278909.22363424255,0.0,0.0,0.0699106903089938,0.0,134.44341240328637,0.1239456985672444,278909.22363424255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,2tnqjvsr,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/2tnqjvsr,2026-03-15T17:10:41Z,8326.316856098,0,0,0,,0.2,100.0,0.0,0.3,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,uwl4b1t4,sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/uwl4b1t4,2026-03-15T17:11:41Z,7730.138244902,0,0,0,,0.6,100.0,0.0,0.15,True,True,baseline,293934.0132863448,273673.5532863448,278235.2158621181,259045.3158621181,0.0,0.0,0.0702286844227449,0.0,137.02187396075487,0.1108792101893818,259045.3158621181,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,mq08631s,sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/mq08631s,2026-03-15T17:11:46Z,7830.903683379,0,0,0,,0.7,100.0,0.0,0.3,True,True,baseline,286859.8032779717,267231.9932779717,273198.5349293896,254530.3349293896,0.0,0.0,0.0694378534785247,0.0,137.6169536272908,0.1086813731317916,254530.3349293896,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,oenf81vs,sweep/ppo/sb3/cpu/default/a0.9/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/oenf81vs,2026-03-15T17:14:03Z,7571.420325966,0,0,0,,0.9,100.0,0.0,0.15,True,True,baseline,268129.28805568966,249777.98805568964,259354.03651639624,241657.8165163962,0.0,0.0,0.0692141212557269,0.0,137.56737533812094,0.1028102128114812,241657.8165163962,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,imvig8ea,sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/imvig8ea,2026-03-15T17:26:17Z,7548.356923917,0,0,0,,0.9,100.0,0.0,0.05,True,True,baseline,269095.26288012683,250709.3028801269,257985.06236888352,240343.2023688835,0.0,0.0,0.0687681637998595,0.0,137.63174822647662,0.1040919495927453,240343.2023688835,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,kc46mwot,sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/kc46mwot,2026-03-15T17:36:54Z,7402.437478922,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,269095.26288012683,250709.3028801269,257985.06236888352,240343.2023688835,0.0,0.0,0.0687681637998595,0.0,137.63174822647662,0.1040919495927453,240343.2023688835,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,6c5g20m0,sweep/ppo/sb3/cpu/default/a0.4/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/6c5g20m0,2026-03-15T17:39:15Z,7987.751960449,0,0,0,,0.4,100.0,0.0,0.05,True,True,baseline,314792.9405088838,293199.96050888376,304000.02795477153,283160.5079547715,0.0,0.0,0.0706474903672308,0.0,137.54347765167836,0.1134114537317883,283160.5079547715,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,zmfirgme,sweep/ppo/sb3/cpu/default/a0.6/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/zmfirgme,2026-03-15T17:39:38Z,7729.43292327,0,0,0,,0.6,100.0,0.0,0.3,True,True,baseline,296881.4938150014,276559.4338150014,282693.0664052287,263321.0864052287,0.0,0.0,0.0689497793839256,0.0,137.65459475595475,0.1116745762120893,263321.0864052287,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,5w978f6n,sweep/ppo/sb3/cpu/default/a0.2/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/5w978f6n,2026-03-15T17:42:23Z,8196.563842857,0,0,0,,0.2,100.0,0.0,0.3,True,True,baseline,328662.28105387173,305848.95105387166,316489.4913151873,294621.8913151873,0.0,0.0,0.0726481757500429,0.0,136.60489081120323,0.115056283050696,294621.8913151873,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,v6yuq532,sweep/ppo/sb3/cpu/default/a0.3/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/v6yuq532,2026-03-15T18:27:32Z,8171.524047551,0,0,0,,0.3,100.0,0.0,0.3,True,True,baseline,325536.3728999571,303203.77289995714,311530.19009115506,290169.93009115505,0.0,0.0,0.0690101249418158,0.0,137.57976469566975,0.115140125484157,290169.93009115505,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,wzs4h708,sweep/ppo/sb3/cpu/default/a1/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/wzs4h708,2026-03-15T18:44:40Z,7213.500579862,0,0,0,,1.0,100.0,0.0,0.3,True,True,baseline,258250.4083985968,240558.37839859675,257579.27605596423,239906.35605596425,0.0,0.0,0.0710781742010645,0.0,137.43891114039735,0.1034797519569495,239906.35605596425,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,drjegsa8,sweep/ppo/sb3/cpu/default/a0.8/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/drjegsa8,2026-03-15T18:53:51Z,7642.750902648,0,0,0,,0.8,100.0,0.0,0.05,True,True,baseline,278042.9708277731,258987.21082777312,265119.53279206343,246979.39279206347,0.0,0.0,0.069699479796535,0.0,137.47635104131075,0.1063946886684759,246979.39279206347,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,np3fvzwt,sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/np3fvzwt,2026-03-15T18:57:50Z,7300.325366337,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,269095.26288012683,250709.3028801269,257985.06236888352,240343.2023688835,0.0,0.0,0.0687681637998595,0.0,137.63174822647662,0.1040919495927453,240343.2023688835,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,kk0sqa97,sweep/ppo/sb3/cpu/default/a0.1/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/kk0sqa97,2026-03-15T19:06:17Z,8525.177181009,0,0,0,,0.1,100.0,0.0,0.3,True,True,baseline,341404.1205957663,317885.0305957663,329505.50925893825,306817.3492589383,0.0,0.0,0.0685274095002656,0.0,137.33021724658855,0.1206998447923596,306817.3492589383,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,i0rpx1kf,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/i0rpx1kf,2026-03-15T19:20:36Z,8356.73493734,0,0,0,,0.2,100.0,0.0,0.05,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,lqmaq5g2,sweep/ppo/sb3/cpu/default/a1/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/lqmaq5g2,2026-03-15T20:02:28Z,7470.274064026,0,0,0,,1.0,100.0,0.0,0.05,True,True,baseline,246584.29279154172,229303.12279154177,244564.78814724492,227386.888147245,0.0,0.0,0.0692074374069363,0.0,135.2844805658817,0.1093837602765936,227386.888147245,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,2umearxm,sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/2umearxm,2026-03-15T20:09:56Z,7829.406313163,0,0,0,,0.5,100.0,0.0,0.3,True,True,baseline,303325.5596877454,282520.29968774534,291965.65710567136,271937.69710567134,0.0,0.0,0.0686525035124021,0.0,137.57073544790862,0.1132342695408356,271937.69710567134,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,k7pirqxy,sweep/ppo/sb3/cpu/default/a1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/k7pirqxy,2026-03-15T20:33:53Z,7216.626889631,0,0,0,,1.0,100.0,0.0,0.15,True,True,baseline,254537.24517731377,236935.99517731369,254471.2696855663,236912.16968556636,0.0,0.0,0.0703905833083271,0.0,136.6143424312229,0.1038838810036006,236912.16968556636,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,algnjce4,sweep/ppo/sb3/cpu/default/a0.6/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/algnjce4,2026-03-15T20:54:24Z,7739.30650029,0,0,0,,0.6,100.0,0.0,0.05,True,True,baseline,296881.4938150014,276559.4338150014,282693.0664052287,263321.0864052287,0.0,0.0,0.0689497793839256,0.0,137.65459475595475,0.1116745762120893,263321.0864052287,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,vqe2dmcq,sweep/ppo/sb3/cpu/default/a0.4/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/vqe2dmcq,2026-03-15T21:08:22Z,7815.774646473,0,0,0,,0.4,100.0,0.0,0.05,True,True,baseline,316543.04043212667,294899.01043212664,299980.59649797506,279386.7564979751,0.0,0.0,0.067603468946279,0.0,137.7846896269947,0.1128739206843639,279386.7564979751,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,0xlvpawh,sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/0xlvpawh,2026-03-15T21:16:04Z,7997.68392245,0,0,0,,0.3,100.0,0.0,0.15,True,True,baseline,325062.60932028474,302657.9893202848,313580.73955351143,292103.1195535114,0.0,0.0,0.0700934793925504,0.0,137.30226556155992,0.1156304945350146,292103.1195535114,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,bofuxayn,sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/bofuxayn,2026-03-15T21:18:05Z,7486.102336723,0,0,0,,0.7,100.0,0.0,0.05,True,True,baseline,285875.15518050164,266287.2051805016,274356.50146499986,255620.24146499988,0.0,0.0,0.0711188680417482,0.0,137.42722406640746,0.1099719716550294,255620.24146499988,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,rujnezt7,sweep/ppo/sb3/cpu/default/a0.5/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/rujnezt7,2026-03-15T21:20:23Z,7936.01356938,0,0,0,,0.5,100.0,0.0,0.15,True,True,baseline,305342.590984541,284402.02098454104,287794.11179162114,267934.8717916211,0.0,0.0,0.0698329564541014,0.0,137.34875112178105,0.1110975441706762,267934.8717916211,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,f9e6wtv0,sweep/ppo/sb3/cpu/default/a0.7/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/f9e6wtv0,2026-03-15T22:07:04Z,8030.825365422,0,0,0,,0.7,100.0,0.0,0.05,True,True,baseline,288003.5379862045,268208.7279862045,274205.49798255006,255466.81798255,0.0,0.0,0.0732015803628115,0.0,137.25851714050424,0.1065894678006264,255466.81798255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,r8hsz3ko,sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/r8hsz3ko,2026-03-15T22:13:06Z,7691.998775531,0,0,0,,0.7,100.0,0.0,0.3,True,True,baseline,286859.8032779717,267231.9932779717,273198.5349293896,254530.3349293896,0.0,0.0,0.0694378534785247,0.0,137.6169536272908,0.1086813731317916,254530.3349293896,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,yukg46hv,sweep/ppo/sb3/cpu/default/a1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/yukg46hv,2026-03-15T23:03:27Z,7094.861108483,0,0,0,,1.0,100.0,0.0,0.15,True,True,baseline,254537.24517731377,236935.99517731369,254471.2696855663,236912.16968556636,0.0,0.0,0.0703905833083271,0.0,136.6143424312229,0.1038838810036006,236912.16968556636,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,e5tciezz,sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/e5tciezz,2026-03-16T00:16:08Z,7569.145925588,0,0,0,,0.7,100.0,0.0,0.05,True,True,baseline,285875.15518050164,266287.2051805016,274356.50146499986,255620.24146499988,0.0,0.0,0.0711188680417482,0.0,137.42722406640746,0.1099719716550294,255620.24146499988,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,1rop5sf9,sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/1rop5sf9,2026-03-16T00:21:00Z,8354.617713686,0,0,0,,0.3,100.0,0.0,0.05,True,True,baseline,321120.1030044527,298922.9430044526,312002.2572538445,290604.6972538445,0.0,0.0,0.0725338635316591,0.0,136.9642983472208,0.1152504371251349,290604.6972538445,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,7muxpseb,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/7muxpseb,2026-03-16T00:21:21Z,8514.602541985,0,0,0,,0.2,100.0,0.0,0.05,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,304dyypp,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/304dyypp,2026-03-16T00:37:04Z,7949.736292204,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,zbw7nmeo,sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/zbw7nmeo,2026-03-16T00:53:02Z,8423.598177489,0,0,0,,0.1,100.0,0.0,0.05,True,True,baseline,340941.7898046945,317438.6698046944,328185.5337341634,305593.15373416344,0.0,0.0,0.0709483560344898,0.0,137.21682561970587,0.1186714838821206,305593.15373416344,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,oxu7rm37,sweep/ppo/sb3/cpu/default/a0.9/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/oxu7rm37,2026-03-16T00:53:31Z,7464.830361968,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,268129.28805568966,249777.98805568964,259354.03651639624,241657.8165163962,0.0,0.0,0.0692141212557269,0.0,137.56737533812094,0.1028102128114812,241657.8165163962,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,m78p26vk,sweep/ppo/sb3/cpu/default/a0/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/m78p26vk,2026-03-16T00:56:58Z,8717.289024041,5,1004,1,,0.0,100.0,0.0,0.15,True,True,baseline,348861.1454509751,324713.0754509751,335967.6160126648,312660.3160126648,0.0,0.0,0.0674835742466741,0.0,136.8813175598437,0.118985751213389,312660.3160126648,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,65zzmszh,sweep/ppo/sb3/cpu/default/a1/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/65zzmszh,2026-03-16T01:14:03Z,7326.553384609,0,0,0,,1.0,100.0,0.0,0.3,True,True,baseline,246584.29279154172,229303.12279154177,244564.78814724492,227386.888147245,0.0,0.0,0.0692074374069363,0.0,135.2844805658817,0.1093837602765936,227386.888147245,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,47xraqt6,sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/47xraqt6,2026-03-16T01:22:01Z,7299.814264453,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,269095.26288012683,250709.3028801269,257985.06236888352,240343.2023688835,0.0,0.0,0.0687681637998595,0.0,137.63174822647662,0.1040919495927453,240343.2023688835,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,mibyt0bf,sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/mibyt0bf,2026-03-16T01:34:44Z,7541.153639959,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,267194.6114143838,248902.78141438385,258791.60782635584,241079.0878263559,0.0,0.0,0.0706779448814682,0.0,137.4716591479769,0.1060063717489262,241079.0878263559,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,8ww25eu1,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/8ww25eu1,2026-03-16T01:45:51Z,8003.812511886,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,cxdz0iyj,sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/cxdz0iyj,2026-03-16T01:50:19Z,7623.493600288,0,0,0,,0.6,100.0,0.0,0.3,True,True,baseline,293934.0132863448,273673.5532863448,278235.2158621181,259045.3158621181,0.0,0.0,0.0702286844227449,0.0,137.02187396075487,0.1108792101893818,259045.3158621181,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,1aeqr4sw,sweep/ppo/sb3/cpu/default/a1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/1aeqr4sw,2026-03-16T01:58:10Z,7156.375097998,0,0,0,,1.0,100.0,0.0,0.3,True,True,baseline,254537.24517731377,236935.99517731369,254471.2696855663,236912.16968556636,0.0,0.0,0.0703905833083271,0.0,136.6143424312229,0.1038838810036006,236912.16968556636,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,7sgqchvk,sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/7sgqchvk,2026-03-16T02:09:14Z,7268.202978965,0,0,0,,0.9,100.0,0.0,0.15,True,True,baseline,267194.6114143838,248902.78141438385,258791.60782635584,241079.0878263559,0.0,0.0,0.0706779448814682,0.0,137.4716591479769,0.1060063717489262,241079.0878263559,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,3s777ena,sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/3s777ena,2026-03-16T02:14:54Z,7762.769931002,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,303325.5596877454,282520.29968774534,291965.65710567136,271937.69710567134,0.0,0.0,0.0686525035124021,0.0,137.57073544790862,0.1132342695408356,271937.69710567134,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,oxsvuh5p,sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/oxsvuh5p,2026-03-16T02:27:01Z,8529.692612353,0,0,0,,0.1,100.0,0.0,0.15,True,True,baseline,340941.7898046945,317438.6698046944,328185.5337341634,305593.15373416344,0.0,0.0,0.0709483560344898,0.0,137.21682561970587,0.1186714838821206,305593.15373416344,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,4unnwl9l,sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/4unnwl9l,2026-03-16T02:34:01Z,7780.065361146,0,0,0,,0.7,100.0,0.0,0.15,True,True,baseline,286859.8032779717,267231.9932779717,273198.5349293896,254530.3349293896,0.0,0.0,0.0694378534785247,0.0,137.6169536272908,0.1086813731317916,254530.3349293896,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,qlfu6ts4,sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/qlfu6ts4,2026-03-16T02:46:52Z,8357.276406226,0,0,0,,0.1,100.0,0.0,0.3,True,True,baseline,340941.7898046945,317438.6698046944,328185.5337341634,305593.15373416344,0.0,0.0,0.0709483560344898,0.0,137.21682561970587,0.1186714838821206,305593.15373416344,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,ya2bb56z,sweep/ppo/sb3/cpu/default/a1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/ya2bb56z,2026-03-16T03:04:37Z,7161.126998896,0,0,0,,1.0,100.0,0.0,0.15,True,True,baseline,254537.24517731377,236935.99517731369,254471.2696855663,236912.16968556636,0.0,0.0,0.0703905833083271,0.0,136.6143424312229,0.1038838810036006,236912.16968556636,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,9hrjmcaf,sweep/ppo/sb3/cpu/default/a0.1/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/9hrjmcaf,2026-03-16T03:13:29Z,8543.819880598,5,1004,1,,0.1,100.0,0.0,0.15,True,True,baseline,345607.36851277394,321934.388512774,330271.9018417394,307619.2418417394,0.0,0.0,0.0688978199434404,0.0,137.65927138408344,0.1180576040723697,307619.2418417394,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,bdz7jpg9,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/bdz7jpg9,2026-03-16T03:19:29Z,8156.512730959,0,0,0,,0.4,100.0,0.0,0.15,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,4e8bw9fr,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/4e8bw9fr,2026-03-16T03:23:44Z,7900.988162577,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,rudposqg,sweep/ppo/sb3/cpu/default/a0.8/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/rudposqg,2026-03-16T04:16:36Z,7803.944972672,0,0,0,,0.8,100.0,0.0,0.15,True,True,baseline,277186.5585556976,258169.5585556976,260819.58418764165,242908.9641876417,0.0,0.0,0.0684627361221973,0.0,137.3260908975896,0.1077409453905398,242908.9641876417,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,r24xwwl9,sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/r24xwwl9,2026-03-16T04:43:43Z,8571.635566955,0,0,0,,0.1,100.0,0.0,0.15,True,True,baseline,340941.7898046945,317438.6698046944,328185.5337341634,305593.15373416344,0.0,0.0,0.0709483560344898,0.0,137.21682561970587,0.1186714838821206,305593.15373416344,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,34c0wzgt,sweep/ppo/sb3/cpu/default/a0.5/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/34c0wzgt,2026-03-16T04:43:54Z,7912.776898111,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,306631.1127310434,285624.6727310434,292140.0218133485,272205.32181334845,0.0,0.0,0.0706121906603894,0.0,137.48236407441985,0.112886126809283,272205.32181334845,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,7bvonhab,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/7bvonhab,2026-03-16T04:59:24Z,8276.510250338,0,0,0,,0.2,100.0,0.0,0.15,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,4f7j1z4p,sweep/ppo/sb3/cpu/default/a0/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/4f7j1z4p,2026-03-16T05:37:06Z,8672.519975981,5,1004,1,,0.0,100.0,0.0,0.3,True,True,baseline,352771.72255003714,328513.3625500371,337718.8770159761,314393.4970159762,0.0,0.0,0.0709252720738168,0.0,137.49769422651883,0.1192149910017191,314393.4970159762,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,c33cyjv9,sweep/ppo/sb3/cpu/default/a0.4/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/c33cyjv9,2026-03-16T05:38:08Z,8164.154912737,0,0,0,,0.4,100.0,0.0,0.15,True,True,baseline,314792.9405088838,293199.96050888376,304000.02795477153,283160.5079547715,0.0,0.0,0.0706474903672308,0.0,137.54347765167836,0.1134114537317883,283160.5079547715,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,i0pylqm1,sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/i0pylqm1,2026-03-16T05:54:46Z,7692.357589996,0,0,0,,0.6,100.0,0.0,0.15,True,True,baseline,293934.0132863448,273673.5532863448,278235.2158621181,259045.3158621181,0.0,0.0,0.0702286844227449,0.0,137.02187396075487,0.1108792101893818,259045.3158621181,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,p1lrhc1t,sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/p1lrhc1t,2026-03-16T06:06:24Z,7906.656203638,0,0,0,,0.5,100.0,0.0,0.15,True,True,baseline,304711.516143744,283789.716143744,290536.18598250934,270609.3259825093,0.0,0.0,0.0700712626186499,0.0,137.43043602946972,0.1112796769387625,270609.3259825093,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,lkhtnobk,sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/lkhtnobk,2026-03-16T06:25:11Z,7304.77470818,0,0,0,,0.9,100.0,0.0,0.3,True,True,baseline,269095.26288012683,250709.3028801269,257985.06236888352,240343.2023688835,0.0,0.0,0.0687681637998595,0.0,137.63174822647662,0.1040919495927453,240343.2023688835,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,dvf0av6p,sweep/ppo/sb3/cpu/default/a0/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/dvf0av6p,2026-03-16T06:34:22Z,8568.236301103,0,0,0,,0.0,100.0,0.0,0.3,True,True,baseline,331626.71399641165,307929.2839964116,301903.22363424243,278909.22363424255,0.0,0.0,0.0699106903089938,0.0,134.44341240328637,0.1239456985672444,278909.22363424255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,k6dz4he1,sweep/ppo/sb3/cpu/default/a0/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/k6dz4he1,2026-03-16T06:38:33Z,8384.405275426,0,0,0,,0.0,100.0,0.0,0.05,True,True,baseline,331626.71399641165,307929.2839964116,301903.22363424243,278909.22363424255,0.0,0.0,0.0699106903089938,0.0,134.44341240328637,0.1239456985672444,278909.22363424255,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,3afj9zm5,sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/3afj9zm5,2026-03-16T06:51:33Z,7947.433015786,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,313890.156459866,292317.566459866,301905.6061551721,281189.2661551722,0.0,0.0,0.0700585666613017,0.0,137.27393385978286,0.1140225013120235,281189.2661551722,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,lvlojvjv,sweep/ppo/sb3/cpu/default/a0.5/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/lvlojvjv,2026-03-16T07:17:09Z,8072.460782252,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,305342.590984541,284402.02098454104,287794.11179162114,267934.8717916211,0.0,0.0,0.0698329564541014,0.0,137.34875112178105,0.1110975441706762,267934.8717916211,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,e6xtq7h5,sweep/ppo/sb3/cpu/default/a0.5/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/e6xtq7h5,2026-03-16T07:20:29Z,8062.476629606,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,306631.1127310434,285624.6727310434,292140.0218133485,272205.32181334845,0.0,0.0,0.0706121906603894,0.0,137.48236407441985,0.112886126809283,272205.32181334845,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,6yrs8xci,sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/6yrs8xci,2026-03-16T07:50:01Z,7609.609823102,0,0,0,,0.6,100.0,0.0,0.15,True,True,baseline,293934.0132863448,273673.5532863448,278235.2158621181,259045.3158621181,0.0,0.0,0.0702286844227449,0.0,137.02187396075487,0.1108792101893818,259045.3158621181,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,16l3qjpm,sweep/ppo/sb3/cpu/default/a0/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/16l3qjpm,2026-03-16T07:50:41Z,8443.503878801,5,1004,1,,0.0,100.0,0.0,0.15,True,True,baseline,348861.1454509751,324713.0754509751,335967.6160126648,312660.3160126648,0.0,0.0,0.0674835742466741,0.0,136.8813175598437,0.118985751213389,312660.3160126648,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,rg98ht1b,sweep/ppo/sb3/cpu/default/a0/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/rg98ht1b,2026-03-16T07:55:36Z,8843.938343818,5,1004,1,,0.0,100.0,0.0,0.05,True,True,baseline,348861.1454509751,324713.0754509751,335967.6160126648,312660.3160126648,0.0,0.0,0.0674835742466741,0.0,136.8813175598437,0.118985751213389,312660.3160126648,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,mxd3i6wr,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/mxd3i6wr,2026-03-16T07:58:03Z,8393.28184472,0,0,0,,0.2,100.0,0.0,0.15,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,0xvyhpg2,sweep/ppo/sb3/cpu/default/a0.9/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/0xvyhpg2,2026-03-16T08:01:43Z,7441.092473369,0,0,0,,0.9,100.0,0.0,0.05,True,True,baseline,268129.28805568966,249777.98805568964,259354.03651639624,241657.8165163962,0.0,0.0,0.0692141212557269,0.0,137.56737533812094,0.1028102128114812,241657.8165163962,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,eull6lat,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/eull6lat,2026-03-16T08:03:08Z,8338.76018915,0,0,0,,0.2,100.0,0.0,0.05,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,5zekml75,sweep/ppo/sb3/cpu/default/a0.8/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/5zekml75,2026-03-16T08:06:29Z,7265.4990034,0,0,0,,0.8,100.0,0.0,0.15,True,True,baseline,277537.1135308166,258574.23353081665,260525.6140973399,242761.4740973399,0.0,0.0,0.0691119185711536,0.0,137.63850710873982,0.1055234893030045,242761.4740973399,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,fed0y4px,sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/fed0y4px,2026-03-16T08:13:55Z,7800.555020283,0,0,0,,0.7,100.0,0.0,0.05,True,True,baseline,286859.8032779717,267231.9932779717,273198.5349293896,254530.3349293896,0.0,0.0,0.0694378534785247,0.0,137.6169536272908,0.1086813731317916,254530.3349293896,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,byifn20j,sweep/ppo/sb3/cpu/default/a0.4/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/byifn20j,2026-03-16T08:20:55Z,8108.199462596,0,0,0,,0.4,100.0,0.0,0.3,True,True,baseline,316543.04043212667,294899.01043212664,299980.59649797506,279386.7564979751,0.0,0.0,0.067603468946279,0.0,137.7846896269947,0.1128739206843639,279386.7564979751,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,35rb8529,sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/35rb8529,2026-03-16T08:24:52Z,7749.649896228,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,304711.516143744,283789.716143744,290536.18598250934,270609.3259825093,0.0,0.0,0.0700712626186499,0.0,137.43043602946972,0.1112796769387625,270609.3259825093,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,foinu2r1,sweep/ppo/sb3/cpu/default/a0.5/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/foinu2r1,2026-03-16T08:51:50Z,7924.351691656,0,0,0,,0.5,100.0,0.0,0.05,True,True,baseline,306631.1127310434,285624.6727310434,292140.0218133485,272205.32181334845,0.0,0.0,0.0706121906603894,0.0,137.48236407441985,0.112886126809283,272205.32181334845,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,nsg7m2ud,sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/nsg7m2ud,2026-03-16T09:06:10Z,7732.794663489,0,0,0,,0.5,100.0,0.0,0.3,True,True,baseline,303325.5596877454,282520.29968774534,291965.65710567136,271937.69710567134,0.0,0.0,0.0686525035124021,0.0,137.57073544790862,0.1132342695408356,271937.69710567134,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,gpririem,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/gpririem,2026-03-16T09:20:57Z,8532.119121611,0,0,0,,0.2,100.0,0.0,0.3,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,9bmbalnk,sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/9bmbalnk,2026-03-16T10:05:49Z,7576.93090345,0,0,0,,0.7,100.0,0.0,0.15,True,True,baseline,285875.15518050164,266287.2051805016,274356.50146499986,255620.24146499988,0.0,0.0,0.0711188680417482,0.0,137.42722406640746,0.1099719716550294,255620.24146499988,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,9ma76sch,sweep/ppo/sb3/cpu/default/a0.1/baseline/s1337,finished,https://wandb.ai/lusiana/capstone_tpu/runs/9ma76sch,2026-03-16T10:23:59Z,8544.8427845,0,0,0,,0.1,100.0,0.0,0.3,True,True,baseline,341404.1205957663,317885.0305957663,329505.50925893825,306817.3492589383,0.0,0.0,0.0685274095002656,0.0,137.33021724658855,0.1206998447923596,306817.3492589383,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,cvrztiyb,sweep/ppo/sb3/cpu/default/a0.2/baseline/s42,finished,https://wandb.ai/lusiana/capstone_tpu/runs/cvrztiyb,2026-03-16T10:27:26Z,8353.396268583,0,0,0,,0.2,100.0,0.0,0.3,True,True,baseline,333463.32883383776,310606.38883383776,322375.37087837915,300349.6308783791,0.0,0.0,0.0694238399850746,0.0,137.6206723870474,0.1176551945750585,300349.6308783791,baseline
i88nw811,lusiana/capstone_tpu/i88nw811,7z9spcc6,sweep/ppo/sb3/cpu/default/a0/baseline/s7777,finished,https://wandb.ai/lusiana/capstone_tpu/runs/7z9spcc6,2026-03-16T10:29:46Z,8444.449882423,5,1004,1,,0.0,100.0,0.0,0.3,True,True,baseline,348861.1454509751,324713.0754509751,335967.6160126648,312660.3160126648,0.0,0.0,0.0674835742466741,0.0,136.8813175598437,0.118985751213389,312660.3160126648,baseline
1 sweep_id sweep_full_id run_id run_name state run_url created_at runtime downloaded_files history_rows selected_for_clone download_error alpha n_products eta_ux lambda_coi baseline_mode no_robust study_mode eval_revenue_mean eval_reward_mean eval_stress_revenue_worst eval_stress_reward_worst eval_supra_share_mean eval_supra_penalty_mean eval_volatility_mean eval_upward_volatility_mean eval_coi_level_mean eval_coi_leakage_mean objective_score mode
2 i88nw811 lusiana/capstone_tpu/i88nw811 0yph6ddt sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/0yph6ddt 2026-03-15T13:48:47Z 7579.766959963 0 0 0 0.7 100.0 0.0 0.05 True True baseline 285875.15518050164 266287.2051805016 274356.50146499986 255620.24146499988 0.0 0.0 0.0711188680417482 0.0 137.42722406640746 0.1099719716550294 255620.24146499988 baseline
3 i88nw811 lusiana/capstone_tpu/i88nw811 bjwmxlf4 sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/bjwmxlf4 2026-03-15T13:48:49Z 7514.003863569 0 0 0 0.9 100.0 0.0 0.05 True True baseline 267194.6114143838 248902.78141438385 258791.60782635584 241079.0878263559 0.0 0.0 0.0706779448814682 0.0 137.4716591479769 0.1060063717489262 241079.0878263559 baseline
4 i88nw811 lusiana/capstone_tpu/i88nw811 afod7srx sweep/ppo/sb3/cpu/default/a0/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/afod7srx 2026-03-15T13:48:55Z 8428.923550896 0 0 0 0.0 100.0 0.0 0.15 True True baseline 331626.71399641165 307929.2839964116 301903.22363424243 278909.22363424255 0.0 0.0 0.0699106903089938 0.0 134.44341240328637 0.1239456985672444 278909.22363424255 baseline
5 i88nw811 lusiana/capstone_tpu/i88nw811 czbwbw4o sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/czbwbw4o 2026-03-15T13:48:55Z 8019.834460958 0 0 0 0.3 100.0 0.0 0.05 True True baseline 325062.60932028474 302657.9893202848 313580.73955351143 292103.1195535114 0.0 0.0 0.0700934793925504 0.0 137.30226556155992 0.1156304945350146 292103.1195535114 baseline
6 i88nw811 lusiana/capstone_tpu/i88nw811 spncr5i5 sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/spncr5i5 2026-03-15T13:48:57Z 7984.536208498 0 0 0 0.4 100.0 0.0 0.3 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
7 i88nw811 lusiana/capstone_tpu/i88nw811 9utcbgal sweep/ppo/sb3/cpu/default/a0.6/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/9utcbgal 2026-03-15T13:48:58Z 7794.573495005 0 0 0 0.6 100.0 0.0 0.3 True True baseline 296881.4938150014 276559.4338150014 282693.0664052287 263321.0864052287 0.0 0.0 0.0689497793839256 0.0 137.65459475595475 0.1116745762120893 263321.0864052287 baseline
8 i88nw811 lusiana/capstone_tpu/i88nw811 6uhc0zfi sweep/ppo/sb3/cpu/default/a0.1/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/6uhc0zfi 2026-03-15T13:48:59Z 8739.343652451 5 5000 1 0.1 100.0 0.0 0.3 True True baseline 345607.36851277394 321934.388512774 330271.9018417394 307619.2418417394 0.0 0.0 0.0688978199434404 0.0 137.65927138408344 0.1180576040723697 307619.2418417394 baseline
9 i88nw811 lusiana/capstone_tpu/i88nw811 mid9h16o sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/mid9h16o 2026-03-15T13:48:59Z 7934.709025792 0 0 0 0.3 100.0 0.0 0.15 True True baseline 321120.1030044527 298922.9430044526 312002.2572538445 290604.6972538445 0.0 0.0 0.0725338635316591 0.0 136.9642983472208 0.1152504371251349 290604.6972538445 baseline
10 i88nw811 lusiana/capstone_tpu/i88nw811 hm8geh95 sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/hm8geh95 2026-03-15T13:49:01Z 8324.170881475 0 0 0 0.3 100.0 0.0 0.05 True True baseline 321120.1030044527 298922.9430044526 312002.2572538445 290604.6972538445 0.0 0.0 0.0725338635316591 0.0 136.9642983472208 0.1152504371251349 290604.6972538445 baseline
11 i88nw811 lusiana/capstone_tpu/i88nw811 2k3bx48e sweep/ppo/sb3/cpu/default/a0.7/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/2k3bx48e 2026-03-15T13:49:03Z 7579.046562713 0 0 0 0.7 100.0 0.0 0.3 True True baseline 288003.5379862045 268208.7279862045 274205.49798255006 255466.81798255 0.0 0.0 0.0732015803628115 0.0 137.25851714050424 0.1065894678006264 255466.81798255 baseline
12 i88nw811 lusiana/capstone_tpu/i88nw811 mlcllxuf sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/mlcllxuf 2026-03-15T15:28:13Z 8048.447950291 0 0 0 0.3 100.0 0.0 0.05 True True baseline 325062.60932028474 302657.9893202848 313580.73955351143 292103.1195535114 0.0 0.0 0.0700934793925504 0.0 137.30226556155992 0.1156304945350146 292103.1195535114 baseline
13 i88nw811 lusiana/capstone_tpu/i88nw811 gsx5p3xl sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/gsx5p3xl 2026-03-15T15:29:00Z 7666.062008427 0 0 0 0.7 100.0 0.0 0.3 True True baseline 286859.8032779717 267231.9932779717 273198.5349293896 254530.3349293896 0.0 0.0 0.0694378534785247 0.0 137.6169536272908 0.1086813731317916 254530.3349293896 baseline
14 i88nw811 lusiana/capstone_tpu/i88nw811 dh2sidg0 sweep/ppo/sb3/cpu/default/a0.8/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/dh2sidg0 2026-03-15T15:31:51Z 7450.114589126 0 0 0 0.8 100.0 0.0 0.3 True True baseline 277537.1135308166 258574.23353081665 260525.6140973399 242761.4740973399 0.0 0.0 0.0691119185711536 0.0 137.63850710873982 0.1055234893030045 242761.4740973399 baseline
15 i88nw811 lusiana/capstone_tpu/i88nw811 izb1xfjn sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/izb1xfjn 2026-03-15T15:38:35Z 8138.431632101 0 0 0 0.4 100.0 0.0 0.05 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
16 i88nw811 lusiana/capstone_tpu/i88nw811 h5v0bjkk sweep/ppo/sb3/cpu/default/a1/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/h5v0bjkk 2026-03-15T15:53:08Z 7430.137394885 0 0 0 1.0 100.0 0.0 0.05 True True baseline 258250.4083985968 240558.37839859675 257579.27605596423 239906.35605596425 0.0 0.0 0.0710781742010645 0.0 137.43891114039735 0.1034797519569495 239906.35605596425 baseline
17 i88nw811 lusiana/capstone_tpu/i88nw811 oo9x7mtj sweep/ppo/sb3/cpu/default/a0/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/oo9x7mtj 2026-03-15T17:08:57Z 8434.676111878 0 0 0 0.0 100.0 0.0 0.15 True True baseline 331626.71399641165 307929.2839964116 301903.22363424243 278909.22363424255 0.0 0.0 0.0699106903089938 0.0 134.44341240328637 0.1239456985672444 278909.22363424255 baseline
18 i88nw811 lusiana/capstone_tpu/i88nw811 2tnqjvsr sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/2tnqjvsr 2026-03-15T17:10:41Z 8326.316856098 0 0 0 0.2 100.0 0.0 0.3 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
19 i88nw811 lusiana/capstone_tpu/i88nw811 uwl4b1t4 sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/uwl4b1t4 2026-03-15T17:11:41Z 7730.138244902 0 0 0 0.6 100.0 0.0 0.15 True True baseline 293934.0132863448 273673.5532863448 278235.2158621181 259045.3158621181 0.0 0.0 0.0702286844227449 0.0 137.02187396075487 0.1108792101893818 259045.3158621181 baseline
20 i88nw811 lusiana/capstone_tpu/i88nw811 mq08631s sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/mq08631s 2026-03-15T17:11:46Z 7830.903683379 0 0 0 0.7 100.0 0.0 0.3 True True baseline 286859.8032779717 267231.9932779717 273198.5349293896 254530.3349293896 0.0 0.0 0.0694378534785247 0.0 137.6169536272908 0.1086813731317916 254530.3349293896 baseline
21 i88nw811 lusiana/capstone_tpu/i88nw811 oenf81vs sweep/ppo/sb3/cpu/default/a0.9/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/oenf81vs 2026-03-15T17:14:03Z 7571.420325966 0 0 0 0.9 100.0 0.0 0.15 True True baseline 268129.28805568966 249777.98805568964 259354.03651639624 241657.8165163962 0.0 0.0 0.0692141212557269 0.0 137.56737533812094 0.1028102128114812 241657.8165163962 baseline
22 i88nw811 lusiana/capstone_tpu/i88nw811 imvig8ea sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/imvig8ea 2026-03-15T17:26:17Z 7548.356923917 0 0 0 0.9 100.0 0.0 0.05 True True baseline 269095.26288012683 250709.3028801269 257985.06236888352 240343.2023688835 0.0 0.0 0.0687681637998595 0.0 137.63174822647662 0.1040919495927453 240343.2023688835 baseline
23 i88nw811 lusiana/capstone_tpu/i88nw811 kc46mwot sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/kc46mwot 2026-03-15T17:36:54Z 7402.437478922 0 0 0 0.9 100.0 0.0 0.3 True True baseline 269095.26288012683 250709.3028801269 257985.06236888352 240343.2023688835 0.0 0.0 0.0687681637998595 0.0 137.63174822647662 0.1040919495927453 240343.2023688835 baseline
24 i88nw811 lusiana/capstone_tpu/i88nw811 6c5g20m0 sweep/ppo/sb3/cpu/default/a0.4/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/6c5g20m0 2026-03-15T17:39:15Z 7987.751960449 0 0 0 0.4 100.0 0.0 0.05 True True baseline 314792.9405088838 293199.96050888376 304000.02795477153 283160.5079547715 0.0 0.0 0.0706474903672308 0.0 137.54347765167836 0.1134114537317883 283160.5079547715 baseline
25 i88nw811 lusiana/capstone_tpu/i88nw811 zmfirgme sweep/ppo/sb3/cpu/default/a0.6/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/zmfirgme 2026-03-15T17:39:38Z 7729.43292327 0 0 0 0.6 100.0 0.0 0.3 True True baseline 296881.4938150014 276559.4338150014 282693.0664052287 263321.0864052287 0.0 0.0 0.0689497793839256 0.0 137.65459475595475 0.1116745762120893 263321.0864052287 baseline
26 i88nw811 lusiana/capstone_tpu/i88nw811 5w978f6n sweep/ppo/sb3/cpu/default/a0.2/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/5w978f6n 2026-03-15T17:42:23Z 8196.563842857 0 0 0 0.2 100.0 0.0 0.3 True True baseline 328662.28105387173 305848.95105387166 316489.4913151873 294621.8913151873 0.0 0.0 0.0726481757500429 0.0 136.60489081120323 0.115056283050696 294621.8913151873 baseline
27 i88nw811 lusiana/capstone_tpu/i88nw811 v6yuq532 sweep/ppo/sb3/cpu/default/a0.3/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/v6yuq532 2026-03-15T18:27:32Z 8171.524047551 0 0 0 0.3 100.0 0.0 0.3 True True baseline 325536.3728999571 303203.77289995714 311530.19009115506 290169.93009115505 0.0 0.0 0.0690101249418158 0.0 137.57976469566975 0.115140125484157 290169.93009115505 baseline
28 i88nw811 lusiana/capstone_tpu/i88nw811 wzs4h708 sweep/ppo/sb3/cpu/default/a1/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/wzs4h708 2026-03-15T18:44:40Z 7213.500579862 0 0 0 1.0 100.0 0.0 0.3 True True baseline 258250.4083985968 240558.37839859675 257579.27605596423 239906.35605596425 0.0 0.0 0.0710781742010645 0.0 137.43891114039735 0.1034797519569495 239906.35605596425 baseline
29 i88nw811 lusiana/capstone_tpu/i88nw811 drjegsa8 sweep/ppo/sb3/cpu/default/a0.8/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/drjegsa8 2026-03-15T18:53:51Z 7642.750902648 0 0 0 0.8 100.0 0.0 0.05 True True baseline 278042.9708277731 258987.21082777312 265119.53279206343 246979.39279206347 0.0 0.0 0.069699479796535 0.0 137.47635104131075 0.1063946886684759 246979.39279206347 baseline
30 i88nw811 lusiana/capstone_tpu/i88nw811 np3fvzwt sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/np3fvzwt 2026-03-15T18:57:50Z 7300.325366337 0 0 0 0.9 100.0 0.0 0.3 True True baseline 269095.26288012683 250709.3028801269 257985.06236888352 240343.2023688835 0.0 0.0 0.0687681637998595 0.0 137.63174822647662 0.1040919495927453 240343.2023688835 baseline
31 i88nw811 lusiana/capstone_tpu/i88nw811 kk0sqa97 sweep/ppo/sb3/cpu/default/a0.1/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/kk0sqa97 2026-03-15T19:06:17Z 8525.177181009 0 0 0 0.1 100.0 0.0 0.3 True True baseline 341404.1205957663 317885.0305957663 329505.50925893825 306817.3492589383 0.0 0.0 0.0685274095002656 0.0 137.33021724658855 0.1206998447923596 306817.3492589383 baseline
32 i88nw811 lusiana/capstone_tpu/i88nw811 i0rpx1kf sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/i0rpx1kf 2026-03-15T19:20:36Z 8356.73493734 0 0 0 0.2 100.0 0.0 0.05 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
33 i88nw811 lusiana/capstone_tpu/i88nw811 lqmaq5g2 sweep/ppo/sb3/cpu/default/a1/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/lqmaq5g2 2026-03-15T20:02:28Z 7470.274064026 0 0 0 1.0 100.0 0.0 0.05 True True baseline 246584.29279154172 229303.12279154177 244564.78814724492 227386.888147245 0.0 0.0 0.0692074374069363 0.0 135.2844805658817 0.1093837602765936 227386.888147245 baseline
34 i88nw811 lusiana/capstone_tpu/i88nw811 2umearxm sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/2umearxm 2026-03-15T20:09:56Z 7829.406313163 0 0 0 0.5 100.0 0.0 0.3 True True baseline 303325.5596877454 282520.29968774534 291965.65710567136 271937.69710567134 0.0 0.0 0.0686525035124021 0.0 137.57073544790862 0.1132342695408356 271937.69710567134 baseline
35 i88nw811 lusiana/capstone_tpu/i88nw811 k7pirqxy sweep/ppo/sb3/cpu/default/a1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/k7pirqxy 2026-03-15T20:33:53Z 7216.626889631 0 0 0 1.0 100.0 0.0 0.15 True True baseline 254537.24517731377 236935.99517731369 254471.2696855663 236912.16968556636 0.0 0.0 0.0703905833083271 0.0 136.6143424312229 0.1038838810036006 236912.16968556636 baseline
36 i88nw811 lusiana/capstone_tpu/i88nw811 algnjce4 sweep/ppo/sb3/cpu/default/a0.6/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/algnjce4 2026-03-15T20:54:24Z 7739.30650029 0 0 0 0.6 100.0 0.0 0.05 True True baseline 296881.4938150014 276559.4338150014 282693.0664052287 263321.0864052287 0.0 0.0 0.0689497793839256 0.0 137.65459475595475 0.1116745762120893 263321.0864052287 baseline
37 i88nw811 lusiana/capstone_tpu/i88nw811 vqe2dmcq sweep/ppo/sb3/cpu/default/a0.4/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/vqe2dmcq 2026-03-15T21:08:22Z 7815.774646473 0 0 0 0.4 100.0 0.0 0.05 True True baseline 316543.04043212667 294899.01043212664 299980.59649797506 279386.7564979751 0.0 0.0 0.067603468946279 0.0 137.7846896269947 0.1128739206843639 279386.7564979751 baseline
38 i88nw811 lusiana/capstone_tpu/i88nw811 0xlvpawh sweep/ppo/sb3/cpu/default/a0.3/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/0xlvpawh 2026-03-15T21:16:04Z 7997.68392245 0 0 0 0.3 100.0 0.0 0.15 True True baseline 325062.60932028474 302657.9893202848 313580.73955351143 292103.1195535114 0.0 0.0 0.0700934793925504 0.0 137.30226556155992 0.1156304945350146 292103.1195535114 baseline
39 i88nw811 lusiana/capstone_tpu/i88nw811 bofuxayn sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/bofuxayn 2026-03-15T21:18:05Z 7486.102336723 0 0 0 0.7 100.0 0.0 0.05 True True baseline 285875.15518050164 266287.2051805016 274356.50146499986 255620.24146499988 0.0 0.0 0.0711188680417482 0.0 137.42722406640746 0.1099719716550294 255620.24146499988 baseline
40 i88nw811 lusiana/capstone_tpu/i88nw811 rujnezt7 sweep/ppo/sb3/cpu/default/a0.5/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/rujnezt7 2026-03-15T21:20:23Z 7936.01356938 0 0 0 0.5 100.0 0.0 0.15 True True baseline 305342.590984541 284402.02098454104 287794.11179162114 267934.8717916211 0.0 0.0 0.0698329564541014 0.0 137.34875112178105 0.1110975441706762 267934.8717916211 baseline
41 i88nw811 lusiana/capstone_tpu/i88nw811 f9e6wtv0 sweep/ppo/sb3/cpu/default/a0.7/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/f9e6wtv0 2026-03-15T22:07:04Z 8030.825365422 0 0 0 0.7 100.0 0.0 0.05 True True baseline 288003.5379862045 268208.7279862045 274205.49798255006 255466.81798255 0.0 0.0 0.0732015803628115 0.0 137.25851714050424 0.1065894678006264 255466.81798255 baseline
42 i88nw811 lusiana/capstone_tpu/i88nw811 r8hsz3ko sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/r8hsz3ko 2026-03-15T22:13:06Z 7691.998775531 0 0 0 0.7 100.0 0.0 0.3 True True baseline 286859.8032779717 267231.9932779717 273198.5349293896 254530.3349293896 0.0 0.0 0.0694378534785247 0.0 137.6169536272908 0.1086813731317916 254530.3349293896 baseline
43 i88nw811 lusiana/capstone_tpu/i88nw811 yukg46hv sweep/ppo/sb3/cpu/default/a1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/yukg46hv 2026-03-15T23:03:27Z 7094.861108483 0 0 0 1.0 100.0 0.0 0.15 True True baseline 254537.24517731377 236935.99517731369 254471.2696855663 236912.16968556636 0.0 0.0 0.0703905833083271 0.0 136.6143424312229 0.1038838810036006 236912.16968556636 baseline
44 i88nw811 lusiana/capstone_tpu/i88nw811 e5tciezz sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/e5tciezz 2026-03-16T00:16:08Z 7569.145925588 0 0 0 0.7 100.0 0.0 0.05 True True baseline 285875.15518050164 266287.2051805016 274356.50146499986 255620.24146499988 0.0 0.0 0.0711188680417482 0.0 137.42722406640746 0.1099719716550294 255620.24146499988 baseline
45 i88nw811 lusiana/capstone_tpu/i88nw811 1rop5sf9 sweep/ppo/sb3/cpu/default/a0.3/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/1rop5sf9 2026-03-16T00:21:00Z 8354.617713686 0 0 0 0.3 100.0 0.0 0.05 True True baseline 321120.1030044527 298922.9430044526 312002.2572538445 290604.6972538445 0.0 0.0 0.0725338635316591 0.0 136.9642983472208 0.1152504371251349 290604.6972538445 baseline
46 i88nw811 lusiana/capstone_tpu/i88nw811 7muxpseb sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/7muxpseb 2026-03-16T00:21:21Z 8514.602541985 0 0 0 0.2 100.0 0.0 0.05 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
47 i88nw811 lusiana/capstone_tpu/i88nw811 304dyypp sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/304dyypp 2026-03-16T00:37:04Z 7949.736292204 0 0 0 0.4 100.0 0.0 0.3 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
48 i88nw811 lusiana/capstone_tpu/i88nw811 zbw7nmeo sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/zbw7nmeo 2026-03-16T00:53:02Z 8423.598177489 0 0 0 0.1 100.0 0.0 0.05 True True baseline 340941.7898046945 317438.6698046944 328185.5337341634 305593.15373416344 0.0 0.0 0.0709483560344898 0.0 137.21682561970587 0.1186714838821206 305593.15373416344 baseline
49 i88nw811 lusiana/capstone_tpu/i88nw811 oxu7rm37 sweep/ppo/sb3/cpu/default/a0.9/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/oxu7rm37 2026-03-16T00:53:31Z 7464.830361968 0 0 0 0.9 100.0 0.0 0.3 True True baseline 268129.28805568966 249777.98805568964 259354.03651639624 241657.8165163962 0.0 0.0 0.0692141212557269 0.0 137.56737533812094 0.1028102128114812 241657.8165163962 baseline
50 i88nw811 lusiana/capstone_tpu/i88nw811 m78p26vk sweep/ppo/sb3/cpu/default/a0/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/m78p26vk 2026-03-16T00:56:58Z 8717.289024041 5 1004 1 0.0 100.0 0.0 0.15 True True baseline 348861.1454509751 324713.0754509751 335967.6160126648 312660.3160126648 0.0 0.0 0.0674835742466741 0.0 136.8813175598437 0.118985751213389 312660.3160126648 baseline
51 i88nw811 lusiana/capstone_tpu/i88nw811 65zzmszh sweep/ppo/sb3/cpu/default/a1/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/65zzmszh 2026-03-16T01:14:03Z 7326.553384609 0 0 0 1.0 100.0 0.0 0.3 True True baseline 246584.29279154172 229303.12279154177 244564.78814724492 227386.888147245 0.0 0.0 0.0692074374069363 0.0 135.2844805658817 0.1093837602765936 227386.888147245 baseline
52 i88nw811 lusiana/capstone_tpu/i88nw811 47xraqt6 sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/47xraqt6 2026-03-16T01:22:01Z 7299.814264453 0 0 0 0.9 100.0 0.0 0.3 True True baseline 269095.26288012683 250709.3028801269 257985.06236888352 240343.2023688835 0.0 0.0 0.0687681637998595 0.0 137.63174822647662 0.1040919495927453 240343.2023688835 baseline
53 i88nw811 lusiana/capstone_tpu/i88nw811 mibyt0bf sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/mibyt0bf 2026-03-16T01:34:44Z 7541.153639959 0 0 0 0.9 100.0 0.0 0.3 True True baseline 267194.6114143838 248902.78141438385 258791.60782635584 241079.0878263559 0.0 0.0 0.0706779448814682 0.0 137.4716591479769 0.1060063717489262 241079.0878263559 baseline
54 i88nw811 lusiana/capstone_tpu/i88nw811 8ww25eu1 sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/8ww25eu1 2026-03-16T01:45:51Z 8003.812511886 0 0 0 0.4 100.0 0.0 0.3 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
55 i88nw811 lusiana/capstone_tpu/i88nw811 cxdz0iyj sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/cxdz0iyj 2026-03-16T01:50:19Z 7623.493600288 0 0 0 0.6 100.0 0.0 0.3 True True baseline 293934.0132863448 273673.5532863448 278235.2158621181 259045.3158621181 0.0 0.0 0.0702286844227449 0.0 137.02187396075487 0.1108792101893818 259045.3158621181 baseline
56 i88nw811 lusiana/capstone_tpu/i88nw811 1aeqr4sw sweep/ppo/sb3/cpu/default/a1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/1aeqr4sw 2026-03-16T01:58:10Z 7156.375097998 0 0 0 1.0 100.0 0.0 0.3 True True baseline 254537.24517731377 236935.99517731369 254471.2696855663 236912.16968556636 0.0 0.0 0.0703905833083271 0.0 136.6143424312229 0.1038838810036006 236912.16968556636 baseline
57 i88nw811 lusiana/capstone_tpu/i88nw811 7sgqchvk sweep/ppo/sb3/cpu/default/a0.9/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/7sgqchvk 2026-03-16T02:09:14Z 7268.202978965 0 0 0 0.9 100.0 0.0 0.15 True True baseline 267194.6114143838 248902.78141438385 258791.60782635584 241079.0878263559 0.0 0.0 0.0706779448814682 0.0 137.4716591479769 0.1060063717489262 241079.0878263559 baseline
58 i88nw811 lusiana/capstone_tpu/i88nw811 3s777ena sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/3s777ena 2026-03-16T02:14:54Z 7762.769931002 0 0 0 0.5 100.0 0.0 0.05 True True baseline 303325.5596877454 282520.29968774534 291965.65710567136 271937.69710567134 0.0 0.0 0.0686525035124021 0.0 137.57073544790862 0.1132342695408356 271937.69710567134 baseline
59 i88nw811 lusiana/capstone_tpu/i88nw811 oxsvuh5p sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/oxsvuh5p 2026-03-16T02:27:01Z 8529.692612353 0 0 0 0.1 100.0 0.0 0.15 True True baseline 340941.7898046945 317438.6698046944 328185.5337341634 305593.15373416344 0.0 0.0 0.0709483560344898 0.0 137.21682561970587 0.1186714838821206 305593.15373416344 baseline
60 i88nw811 lusiana/capstone_tpu/i88nw811 4unnwl9l sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/4unnwl9l 2026-03-16T02:34:01Z 7780.065361146 0 0 0 0.7 100.0 0.0 0.15 True True baseline 286859.8032779717 267231.9932779717 273198.5349293896 254530.3349293896 0.0 0.0 0.0694378534785247 0.0 137.6169536272908 0.1086813731317916 254530.3349293896 baseline
61 i88nw811 lusiana/capstone_tpu/i88nw811 qlfu6ts4 sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/qlfu6ts4 2026-03-16T02:46:52Z 8357.276406226 0 0 0 0.1 100.0 0.0 0.3 True True baseline 340941.7898046945 317438.6698046944 328185.5337341634 305593.15373416344 0.0 0.0 0.0709483560344898 0.0 137.21682561970587 0.1186714838821206 305593.15373416344 baseline
62 i88nw811 lusiana/capstone_tpu/i88nw811 ya2bb56z sweep/ppo/sb3/cpu/default/a1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/ya2bb56z 2026-03-16T03:04:37Z 7161.126998896 0 0 0 1.0 100.0 0.0 0.15 True True baseline 254537.24517731377 236935.99517731369 254471.2696855663 236912.16968556636 0.0 0.0 0.0703905833083271 0.0 136.6143424312229 0.1038838810036006 236912.16968556636 baseline
63 i88nw811 lusiana/capstone_tpu/i88nw811 9hrjmcaf sweep/ppo/sb3/cpu/default/a0.1/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/9hrjmcaf 2026-03-16T03:13:29Z 8543.819880598 5 1004 1 0.1 100.0 0.0 0.15 True True baseline 345607.36851277394 321934.388512774 330271.9018417394 307619.2418417394 0.0 0.0 0.0688978199434404 0.0 137.65927138408344 0.1180576040723697 307619.2418417394 baseline
64 i88nw811 lusiana/capstone_tpu/i88nw811 bdz7jpg9 sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/bdz7jpg9 2026-03-16T03:19:29Z 8156.512730959 0 0 0 0.4 100.0 0.0 0.15 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
65 i88nw811 lusiana/capstone_tpu/i88nw811 4e8bw9fr sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/4e8bw9fr 2026-03-16T03:23:44Z 7900.988162577 0 0 0 0.4 100.0 0.0 0.3 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
66 i88nw811 lusiana/capstone_tpu/i88nw811 rudposqg sweep/ppo/sb3/cpu/default/a0.8/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/rudposqg 2026-03-16T04:16:36Z 7803.944972672 0 0 0 0.8 100.0 0.0 0.15 True True baseline 277186.5585556976 258169.5585556976 260819.58418764165 242908.9641876417 0.0 0.0 0.0684627361221973 0.0 137.3260908975896 0.1077409453905398 242908.9641876417 baseline
67 i88nw811 lusiana/capstone_tpu/i88nw811 r24xwwl9 sweep/ppo/sb3/cpu/default/a0.1/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/r24xwwl9 2026-03-16T04:43:43Z 8571.635566955 0 0 0 0.1 100.0 0.0 0.15 True True baseline 340941.7898046945 317438.6698046944 328185.5337341634 305593.15373416344 0.0 0.0 0.0709483560344898 0.0 137.21682561970587 0.1186714838821206 305593.15373416344 baseline
68 i88nw811 lusiana/capstone_tpu/i88nw811 34c0wzgt sweep/ppo/sb3/cpu/default/a0.5/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/34c0wzgt 2026-03-16T04:43:54Z 7912.776898111 0 0 0 0.5 100.0 0.0 0.05 True True baseline 306631.1127310434 285624.6727310434 292140.0218133485 272205.32181334845 0.0 0.0 0.0706121906603894 0.0 137.48236407441985 0.112886126809283 272205.32181334845 baseline
69 i88nw811 lusiana/capstone_tpu/i88nw811 7bvonhab sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/7bvonhab 2026-03-16T04:59:24Z 8276.510250338 0 0 0 0.2 100.0 0.0 0.15 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
70 i88nw811 lusiana/capstone_tpu/i88nw811 4f7j1z4p sweep/ppo/sb3/cpu/default/a0/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/4f7j1z4p 2026-03-16T05:37:06Z 8672.519975981 5 1004 1 0.0 100.0 0.0 0.3 True True baseline 352771.72255003714 328513.3625500371 337718.8770159761 314393.4970159762 0.0 0.0 0.0709252720738168 0.0 137.49769422651883 0.1192149910017191 314393.4970159762 baseline
71 i88nw811 lusiana/capstone_tpu/i88nw811 c33cyjv9 sweep/ppo/sb3/cpu/default/a0.4/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/c33cyjv9 2026-03-16T05:38:08Z 8164.154912737 0 0 0 0.4 100.0 0.0 0.15 True True baseline 314792.9405088838 293199.96050888376 304000.02795477153 283160.5079547715 0.0 0.0 0.0706474903672308 0.0 137.54347765167836 0.1134114537317883 283160.5079547715 baseline
72 i88nw811 lusiana/capstone_tpu/i88nw811 i0pylqm1 sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/i0pylqm1 2026-03-16T05:54:46Z 7692.357589996 0 0 0 0.6 100.0 0.0 0.15 True True baseline 293934.0132863448 273673.5532863448 278235.2158621181 259045.3158621181 0.0 0.0 0.0702286844227449 0.0 137.02187396075487 0.1108792101893818 259045.3158621181 baseline
73 i88nw811 lusiana/capstone_tpu/i88nw811 p1lrhc1t sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/p1lrhc1t 2026-03-16T06:06:24Z 7906.656203638 0 0 0 0.5 100.0 0.0 0.15 True True baseline 304711.516143744 283789.716143744 290536.18598250934 270609.3259825093 0.0 0.0 0.0700712626186499 0.0 137.43043602946972 0.1112796769387625 270609.3259825093 baseline
74 i88nw811 lusiana/capstone_tpu/i88nw811 lkhtnobk sweep/ppo/sb3/cpu/default/a0.9/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/lkhtnobk 2026-03-16T06:25:11Z 7304.77470818 0 0 0 0.9 100.0 0.0 0.3 True True baseline 269095.26288012683 250709.3028801269 257985.06236888352 240343.2023688835 0.0 0.0 0.0687681637998595 0.0 137.63174822647662 0.1040919495927453 240343.2023688835 baseline
75 i88nw811 lusiana/capstone_tpu/i88nw811 dvf0av6p sweep/ppo/sb3/cpu/default/a0/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/dvf0av6p 2026-03-16T06:34:22Z 8568.236301103 0 0 0 0.0 100.0 0.0 0.3 True True baseline 331626.71399641165 307929.2839964116 301903.22363424243 278909.22363424255 0.0 0.0 0.0699106903089938 0.0 134.44341240328637 0.1239456985672444 278909.22363424255 baseline
76 i88nw811 lusiana/capstone_tpu/i88nw811 k6dz4he1 sweep/ppo/sb3/cpu/default/a0/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/k6dz4he1 2026-03-16T06:38:33Z 8384.405275426 0 0 0 0.0 100.0 0.0 0.05 True True baseline 331626.71399641165 307929.2839964116 301903.22363424243 278909.22363424255 0.0 0.0 0.0699106903089938 0.0 134.44341240328637 0.1239456985672444 278909.22363424255 baseline
77 i88nw811 lusiana/capstone_tpu/i88nw811 3afj9zm5 sweep/ppo/sb3/cpu/default/a0.4/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/3afj9zm5 2026-03-16T06:51:33Z 7947.433015786 0 0 0 0.4 100.0 0.0 0.3 True True baseline 313890.156459866 292317.566459866 301905.6061551721 281189.2661551722 0.0 0.0 0.0700585666613017 0.0 137.27393385978286 0.1140225013120235 281189.2661551722 baseline
78 i88nw811 lusiana/capstone_tpu/i88nw811 lvlojvjv sweep/ppo/sb3/cpu/default/a0.5/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/lvlojvjv 2026-03-16T07:17:09Z 8072.460782252 0 0 0 0.5 100.0 0.0 0.05 True True baseline 305342.590984541 284402.02098454104 287794.11179162114 267934.8717916211 0.0 0.0 0.0698329564541014 0.0 137.34875112178105 0.1110975441706762 267934.8717916211 baseline
79 i88nw811 lusiana/capstone_tpu/i88nw811 e6xtq7h5 sweep/ppo/sb3/cpu/default/a0.5/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/e6xtq7h5 2026-03-16T07:20:29Z 8062.476629606 0 0 0 0.5 100.0 0.0 0.05 True True baseline 306631.1127310434 285624.6727310434 292140.0218133485 272205.32181334845 0.0 0.0 0.0706121906603894 0.0 137.48236407441985 0.112886126809283 272205.32181334845 baseline
80 i88nw811 lusiana/capstone_tpu/i88nw811 6yrs8xci sweep/ppo/sb3/cpu/default/a0.6/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/6yrs8xci 2026-03-16T07:50:01Z 7609.609823102 0 0 0 0.6 100.0 0.0 0.15 True True baseline 293934.0132863448 273673.5532863448 278235.2158621181 259045.3158621181 0.0 0.0 0.0702286844227449 0.0 137.02187396075487 0.1108792101893818 259045.3158621181 baseline
81 i88nw811 lusiana/capstone_tpu/i88nw811 16l3qjpm sweep/ppo/sb3/cpu/default/a0/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/16l3qjpm 2026-03-16T07:50:41Z 8443.503878801 5 1004 1 0.0 100.0 0.0 0.15 True True baseline 348861.1454509751 324713.0754509751 335967.6160126648 312660.3160126648 0.0 0.0 0.0674835742466741 0.0 136.8813175598437 0.118985751213389 312660.3160126648 baseline
82 i88nw811 lusiana/capstone_tpu/i88nw811 rg98ht1b sweep/ppo/sb3/cpu/default/a0/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/rg98ht1b 2026-03-16T07:55:36Z 8843.938343818 5 1004 1 0.0 100.0 0.0 0.05 True True baseline 348861.1454509751 324713.0754509751 335967.6160126648 312660.3160126648 0.0 0.0 0.0674835742466741 0.0 136.8813175598437 0.118985751213389 312660.3160126648 baseline
83 i88nw811 lusiana/capstone_tpu/i88nw811 mxd3i6wr sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/mxd3i6wr 2026-03-16T07:58:03Z 8393.28184472 0 0 0 0.2 100.0 0.0 0.15 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
84 i88nw811 lusiana/capstone_tpu/i88nw811 0xvyhpg2 sweep/ppo/sb3/cpu/default/a0.9/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/0xvyhpg2 2026-03-16T08:01:43Z 7441.092473369 0 0 0 0.9 100.0 0.0 0.05 True True baseline 268129.28805568966 249777.98805568964 259354.03651639624 241657.8165163962 0.0 0.0 0.0692141212557269 0.0 137.56737533812094 0.1028102128114812 241657.8165163962 baseline
85 i88nw811 lusiana/capstone_tpu/i88nw811 eull6lat sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/eull6lat 2026-03-16T08:03:08Z 8338.76018915 0 0 0 0.2 100.0 0.0 0.05 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
86 i88nw811 lusiana/capstone_tpu/i88nw811 5zekml75 sweep/ppo/sb3/cpu/default/a0.8/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/5zekml75 2026-03-16T08:06:29Z 7265.4990034 0 0 0 0.8 100.0 0.0 0.15 True True baseline 277537.1135308166 258574.23353081665 260525.6140973399 242761.4740973399 0.0 0.0 0.0691119185711536 0.0 137.63850710873982 0.1055234893030045 242761.4740973399 baseline
87 i88nw811 lusiana/capstone_tpu/i88nw811 fed0y4px sweep/ppo/sb3/cpu/default/a0.7/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/fed0y4px 2026-03-16T08:13:55Z 7800.555020283 0 0 0 0.7 100.0 0.0 0.05 True True baseline 286859.8032779717 267231.9932779717 273198.5349293896 254530.3349293896 0.0 0.0 0.0694378534785247 0.0 137.6169536272908 0.1086813731317916 254530.3349293896 baseline
88 i88nw811 lusiana/capstone_tpu/i88nw811 byifn20j sweep/ppo/sb3/cpu/default/a0.4/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/byifn20j 2026-03-16T08:20:55Z 8108.199462596 0 0 0 0.4 100.0 0.0 0.3 True True baseline 316543.04043212667 294899.01043212664 299980.59649797506 279386.7564979751 0.0 0.0 0.067603468946279 0.0 137.7846896269947 0.1128739206843639 279386.7564979751 baseline
89 i88nw811 lusiana/capstone_tpu/i88nw811 35rb8529 sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/35rb8529 2026-03-16T08:24:52Z 7749.649896228 0 0 0 0.5 100.0 0.0 0.05 True True baseline 304711.516143744 283789.716143744 290536.18598250934 270609.3259825093 0.0 0.0 0.0700712626186499 0.0 137.43043602946972 0.1112796769387625 270609.3259825093 baseline
90 i88nw811 lusiana/capstone_tpu/i88nw811 foinu2r1 sweep/ppo/sb3/cpu/default/a0.5/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/foinu2r1 2026-03-16T08:51:50Z 7924.351691656 0 0 0 0.5 100.0 0.0 0.05 True True baseline 306631.1127310434 285624.6727310434 292140.0218133485 272205.32181334845 0.0 0.0 0.0706121906603894 0.0 137.48236407441985 0.112886126809283 272205.32181334845 baseline
91 i88nw811 lusiana/capstone_tpu/i88nw811 nsg7m2ud sweep/ppo/sb3/cpu/default/a0.5/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/nsg7m2ud 2026-03-16T09:06:10Z 7732.794663489 0 0 0 0.5 100.0 0.0 0.3 True True baseline 303325.5596877454 282520.29968774534 291965.65710567136 271937.69710567134 0.0 0.0 0.0686525035124021 0.0 137.57073544790862 0.1132342695408356 271937.69710567134 baseline
92 i88nw811 lusiana/capstone_tpu/i88nw811 gpririem sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/gpririem 2026-03-16T09:20:57Z 8532.119121611 0 0 0 0.2 100.0 0.0 0.3 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
93 i88nw811 lusiana/capstone_tpu/i88nw811 9bmbalnk sweep/ppo/sb3/cpu/default/a0.7/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/9bmbalnk 2026-03-16T10:05:49Z 7576.93090345 0 0 0 0.7 100.0 0.0 0.15 True True baseline 285875.15518050164 266287.2051805016 274356.50146499986 255620.24146499988 0.0 0.0 0.0711188680417482 0.0 137.42722406640746 0.1099719716550294 255620.24146499988 baseline
94 i88nw811 lusiana/capstone_tpu/i88nw811 9ma76sch sweep/ppo/sb3/cpu/default/a0.1/baseline/s1337 finished https://wandb.ai/lusiana/capstone_tpu/runs/9ma76sch 2026-03-16T10:23:59Z 8544.8427845 0 0 0 0.1 100.0 0.0 0.3 True True baseline 341404.1205957663 317885.0305957663 329505.50925893825 306817.3492589383 0.0 0.0 0.0685274095002656 0.0 137.33021724658855 0.1206998447923596 306817.3492589383 baseline
95 i88nw811 lusiana/capstone_tpu/i88nw811 cvrztiyb sweep/ppo/sb3/cpu/default/a0.2/baseline/s42 finished https://wandb.ai/lusiana/capstone_tpu/runs/cvrztiyb 2026-03-16T10:27:26Z 8353.396268583 0 0 0 0.2 100.0 0.0 0.3 True True baseline 333463.32883383776 310606.38883383776 322375.37087837915 300349.6308783791 0.0 0.0 0.0694238399850746 0.0 137.6206723870474 0.1176551945750585 300349.6308783791 baseline
96 i88nw811 lusiana/capstone_tpu/i88nw811 7z9spcc6 sweep/ppo/sb3/cpu/default/a0/baseline/s7777 finished https://wandb.ai/lusiana/capstone_tpu/runs/7z9spcc6 2026-03-16T10:29:46Z 8444.449882423 5 1004 1 0.0 100.0 0.0 0.3 True True baseline 348861.1454509751 324713.0754509751 335967.6160126648 312660.3160126648 0.0 0.0 0.0674835742466741 0.0 136.8813175598437 0.118985751213389 312660.3160126648 baseline

View File

@@ -0,0 +1,31 @@
{
"n": 95,
"k": 2,
"dof": 93,
"df_t": 93,
"cov_type": "hc1",
"clusters": null,
"r2": 0.9759651432807543,
"adj_r2": 0.9757067039611925,
"sse": 1872600419.7223544,
"coefficients": [
{
"name": "intercept",
"coef": 348823.4131652292,
"std_error": 1383.3660823209932,
"t_stat": 252.15553397115096,
"p_value": 0.0,
"ci95_low": 346076.3222890517,
"ci95_high": 351570.5040414067
},
{
"name": "alpha",
"coef": -90140.52744561416,
"std_error": 2185.134882447838,
"t_stat": -41.25169945785529,
"p_value": 0.0,
"ci95_low": -94479.77225976942,
"ci95_high": -85801.2826314589
}
]
}

View File

@@ -0,0 +1,2 @@
sweep_id,n,alpha_coef,alpha_std_error,alpha_t_stat,alpha_p_value,alpha_ci95_low,alpha_ci95_high,r2
i88nw811,95,-90140.52744561416,2185.134882447838,-41.25169945785529,0.0,-94479.77225976942,-85801.2826314589,0.9759651432807543
1 sweep_id n alpha_coef alpha_std_error alpha_t_stat alpha_p_value alpha_ci95_low alpha_ci95_high r2
2 i88nw811 95 -90140.52744561416 2185.134882447838 -41.25169945785529 0.0 -94479.77225976942 -85801.2826314589 0.9759651432807543

View File

@@ -0,0 +1,37 @@
{
"bundle_dir": "/home/velocitatem/Documents/Projects/PHANTOM/engine/studies/results/wandb_sweep_bundles/bundle_20260317_122818",
"git_commit": "e62e842faad79b143f5555d187075e85c8926363",
"cohort_name": "original_n95_baseline_n100",
"filters": {
"sweep_id": [
"i88nw811"
],
"mode": "baseline",
"n_products": 100.0,
"eta_ux": 0.0,
"lambda_coi": null,
"alpha_min": 0.0,
"alpha_max": 1.0
},
"n_rows": 95,
"n_sweeps": 1,
"alpha_unique": [
0.0,
0.1,
0.2,
0.3,
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
1.0
],
"rows_by_sweep": {
"i88nw811": 95
},
"rows_by_mode": {
"baseline": 95
}
}

View File

@@ -0,0 +1,31 @@
{
"n": 95,
"k": 2,
"dof": 93,
"df_t": 93,
"cov_type": "iid",
"clusters": null,
"r2": 0.9759651432807543,
"adj_r2": 0.9757067039611925,
"sse": 1872600419.7223544,
"coefficients": [
{
"name": "intercept",
"coef": 348823.4131652292,
"std_error": 860.7176431608721,
"t_stat": 405.2704344298337,
"p_value": 0.0,
"ci95_low": 347114.1985078009,
"ci95_high": 350532.6278226575
},
{
"name": "alpha",
"coef": -90140.52744561416,
"std_error": 1466.838282353916,
"t_stat": -61.452259959401054,
"p_value": 0.0,
"ci95_low": -93053.37756806448,
"ci95_high": -87227.67732316385
}
]
}

View File

@@ -1,10 +0,0 @@
{
"runs": 340,
"tiers": 5,
"alphas": 6,
"status": "ok",
"mean_tier_revenue_robust": 190714.62212212436,
"mean_tier_revenue_no_robust": 197371.17216609977,
"mean_tier_revenue_delta": -6656.5500439754105,
"mean_tier_revenue_delta_pct": -3.3726050116242514
}

View File

@@ -1,31 +0,0 @@
tier,alpha,runs_robust,runs_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_margin_mean_delta,eval_margin_mean_delta_pct,objective_score_delta,objective_score_delta_pct,train_alpha_adv_delta,train_alpha_adv_delta_pct
dqn,0.0,5.0,2.0,-31308.987414117495,-8.73651226889534,-1909.7427407095092,-0.5742991901121623,-2.8982436567700063,-2.1108702433020436,-0.001972064237093285,-0.2116777198290971,-1909.7427407095092,-0.5742991901121623,,
dqn,0.1,8.0,4.0,-7723.542755668925,-2.2789188721535494,-74239.37371836061,-21.063854618469847,1.7435833801418141,1.2859365583872486,0.0011891962142838164,0.1278074871971924,-74239.37371836061,-21.063854618469847,0.17619791666666657,176.19791666666694
dqn,0.25,7.0,3.0,-12344.82818986749,-3.7035466052614323,93154.03627578515,36.06691230407512,0.03214544949867104,0.023426184113378143,1.763733457238459e-05,0.001893256490383175,93154.03627578515,36.06691230407512,0.14530952380952394,58.12380952380958
dqn,0.4,5.0,10.0,-7816.300706216833,-2.4694340725162824,-42362.74668471434,-13.411888482380219,0.6251272343707797,0.4579446603861758,0.0002750615520492605,0.02953644634355915,-42362.74668471434,-13.411888482380219,0.09856666666666747,24.64166666666691
dqn,0.6,5.0,4.0,-16150.011887742497,-5.347485987139731,-28508.74710866122,-10.151356300001888,-0.63306323164079,-0.46056970247177387,-0.00034537433455417155,-0.0370668515552649,-28508.74710866122,-10.151356300001888,0.1361999999999981,22.699999999999644
dqn,0.8,7.0,6.0,-18191.8826663699,-6.440527544692988,-55296.94441124235,-20.19273590083627,-0.796733634735034,-0.579832425016392,-0.0006423984775592029,-0.0689476165584585,-55296.94441124235,-20.19273590083627,0.1532857142857158,19.160714285714512
linear,0.0,9.0,8.0,-14967.67388588126,-4.273413942959129,-20107.23171681742,-6.60039931288617,-0.06127790826209889,-0.04564810574240612,-7.607744079518586e-05,-0.008177885913528719,-20107.23171681742,-6.60039931288617,,
linear,0.1,3.0,5.0,-24531.399901538738,-7.171831328305365,-96669.7835552101,-26.44920711447249,-0.3680976907859872,-0.2733723058172187,-0.0002515287835096469,-0.02702956778346356,-96669.7835552101,-26.44920711447249,,
linear,0.25,6.0,9.0,-14840.859479571285,-4.520682292638562,-26510.179456423968,-8.033117756667396,-0.13734776448131925,-0.10212641096230607,-9.41162442338328e-05,-0.010115001392981545,-26510.179456423968,-8.033117756667396,,
linear,0.4,4.0,11.0,-17196.7642560167,-5.486915251242723,-74520.10209817477,-25.042311510043184,0.12217076984330788,0.09098828726103136,0.00010713887099822461,0.011516865671259795,-74520.10209817477,-25.042311510043184,,
linear,0.6,5.0,3.0,-14284.06615788641,-4.854766876637072,38417.71856593515,14.088596762512362,0.24251461234271687,0.1806530855220358,0.0002606811969937395,0.028024824619509187,38417.71856593515,14.088596762512362,,
linear,0.8,4.0,11.0,-10840.488575784548,-3.933600919557566,15749.581078662042,6.447651726824251,0.028051260535562506,0.020876236575910773,5.361882659971062e-05,0.005763158099097226,15749.581078662042,6.447651726824251,,
qtable,0.0,9.0,8.0,-18644.457288398524,-8.15323701554329,32993.42568058451,20.675688115613053,10.369779227648095,10.682768960780463,0.018566897519637582,2.0803084179092814,32993.42568058451,20.675688115613053,0.11839814814814797,
qtable,0.1,6.0,5.0,-12549.400855549495,-4.616991193742389,-37207.79701261924,-15.336047254435487,0.0884057957559321,0.07703761042583206,-0.01127789819771663,-1.2272540823820444,-37207.79701261924,-15.336047254435487,0.07577777777777787,75.77777777777803
qtable,0.25,6.0,5.0,-1534.3527429780224,-0.5456640130847226,18433.43663451099,7.304472653867784,-0.5776125938941306,-0.45734160960552755,-0.003316338490628068,-0.3584028328803385,18433.43663451099,7.304472653867784,0.1181458333333334,47.258333333333354
qtable,0.4,8.0,6.0,-15146.258176090778,-5.274860187729517,-37364.22587794208,-13.005651205148677,0.4611471727478005,0.3629050099230144,0.0071046453227539,0.7751478467862876,-37364.22587794208,-13.005651205148677,0.11010416666666772,27.52604166666698
qtable,0.6,6.0,6.0,-9577.578548656049,-3.9322693501816666,-19088.152339068736,-9.571307395166029,0.9081750157567683,0.7495917946306662,0.0015520804425310786,0.16838348372043557,-19088.152339068736,-9.571307395166029,0.16983333333333228,28.305555555555333
qtable,0.8,5.0,2.0,-52751.680936846446,-19.699089872409548,-16508.209313987172,-7.589601869470744,-15.022454081083623,-11.215398490282094,-0.007791824761087751,-0.8384414846099099,-16508.209313987172,-7.589601869470744,0.11120000000000174,13.900000000000245
static,0.0,5.0,6.0,-4782.871053113384,-5.233544525848519,14411.4689779756,25.538141347978577,1.307060701942973,1.8731997380823568,0.002537468952847566,0.2911381045328444,14411.4689779756,25.538141347978577,,
static,0.1,8.0,5.0,1629.4524528499896,1.880088900553112,-5347.078589385725,-8.14812684380662,0.3600324838305795,0.5019134064795009,-4.6492644957929485e-05,-0.005316014641356001,-5347.078589385725,-8.14812684380662,,
static,0.25,5.0,6.0,-9938.662276761897,-10.398087633377964,-23616.087243780566,-27.701108621456626,-3.0513860773271233,-4.099238223547561,-0.003519771479853273,-0.40113716461596144,-23616.087243780566,-27.701108621456626,,
static,0.4,3.0,4.0,1850.8400595222774,2.1912497828943436,15058.659457798465,23.67199439061036,3.669612467486587,5.430169778169349,0.006763447803564415,0.7804393835882188,15058.659457798465,23.67199439061036,,
static,0.6,6.0,5.0,1038.893948415236,1.2765037688226162,-6062.864079504681,-9.363144945348399,-1.712609061865976,-2.3996341009364213,-0.0042285583442709385,-0.48362088973179423,-6062.864079504681,-9.363144945348399,,
static,0.8,3.0,7.0,2696.6340631967323,3.6826150812750567,149.22406835677975,0.27280281303997084,0.8491716126507072,1.2427748744725668,0.0032786525965587954,0.3777595573932637,149.22406835677975,0.27280281303997084,,
surge,0.0,6.0,6.0,-606.73760243367,-5.066579306500225,-244.17585425326251,-5.525800641331023,0.014874931199557295,0.09186560988877175,0.0019308940532419272,0.4471794260021321,-244.17585425326251,-5.525800641331023,,
surge,0.1,2.0,5.0,169.78743573408792,1.446343107913299,-1012.7706974660168,-20.02053666691211,-0.14459518037699226,-0.864651254901582,-0.0018650458785858248,-0.4260349899970559,-1012.7706974660168,-20.02053666691211,,
surge,0.25,10.0,7.0,-128.20993816584632,-1.1276930411162496,-81.21373487263281,-1.7081453033360994,0.3008506477195141,1.839047728806548,0.0030750148302954305,0.7102446987902812,-81.21373487263281,-1.7081453033360994,,
surge,0.4,6.0,6.0,-473.03722764431404,-4.297928307550563,28.557452243338048,0.6755106104955642,-0.5027452173053764,-3.072002360121898,-0.005581380442163164,-1.288152985482699,28.557452243338048,0.6755106104955642,,
surge,0.6,2.0,5.0,307.79436325796996,3.0356727142643067,2060.57396030564,63.382050333909866,0.2339650444065704,1.438519400758399,0.001302270025389629,0.30077697380833807,2060.57396030564,63.382050333909866,,
surge,0.8,3.0,3.0,423.15386247993047,4.372210191290083,1117.0942083304312,34.86182570616373,0.8971464536957541,5.327339899805159,0.007068630716831503,1.6094191039618562,1117.0942083304312,34.86182570616373,,
1 tier alpha runs_robust runs_no_robust eval_revenue_mean_delta eval_revenue_mean_delta_pct eval_reward_mean_delta eval_reward_mean_delta_pct eval_coi_level_mean_delta eval_coi_level_mean_delta_pct eval_margin_mean_delta eval_margin_mean_delta_pct objective_score_delta objective_score_delta_pct train_alpha_adv_delta train_alpha_adv_delta_pct
2 dqn 0.0 5.0 2.0 -31308.987414117495 -8.73651226889534 -1909.7427407095092 -0.5742991901121623 -2.8982436567700063 -2.1108702433020436 -0.001972064237093285 -0.2116777198290971 -1909.7427407095092 -0.5742991901121623
3 dqn 0.1 8.0 4.0 -7723.542755668925 -2.2789188721535494 -74239.37371836061 -21.063854618469847 1.7435833801418141 1.2859365583872486 0.0011891962142838164 0.1278074871971924 -74239.37371836061 -21.063854618469847 0.17619791666666657 176.19791666666694
4 dqn 0.25 7.0 3.0 -12344.82818986749 -3.7035466052614323 93154.03627578515 36.06691230407512 0.03214544949867104 0.023426184113378143 1.763733457238459e-05 0.001893256490383175 93154.03627578515 36.06691230407512 0.14530952380952394 58.12380952380958
5 dqn 0.4 5.0 10.0 -7816.300706216833 -2.4694340725162824 -42362.74668471434 -13.411888482380219 0.6251272343707797 0.4579446603861758 0.0002750615520492605 0.02953644634355915 -42362.74668471434 -13.411888482380219 0.09856666666666747 24.64166666666691
6 dqn 0.6 5.0 4.0 -16150.011887742497 -5.347485987139731 -28508.74710866122 -10.151356300001888 -0.63306323164079 -0.46056970247177387 -0.00034537433455417155 -0.0370668515552649 -28508.74710866122 -10.151356300001888 0.1361999999999981 22.699999999999644
7 dqn 0.8 7.0 6.0 -18191.8826663699 -6.440527544692988 -55296.94441124235 -20.19273590083627 -0.796733634735034 -0.579832425016392 -0.0006423984775592029 -0.0689476165584585 -55296.94441124235 -20.19273590083627 0.1532857142857158 19.160714285714512
8 linear 0.0 9.0 8.0 -14967.67388588126 -4.273413942959129 -20107.23171681742 -6.60039931288617 -0.06127790826209889 -0.04564810574240612 -7.607744079518586e-05 -0.008177885913528719 -20107.23171681742 -6.60039931288617
9 linear 0.1 3.0 5.0 -24531.399901538738 -7.171831328305365 -96669.7835552101 -26.44920711447249 -0.3680976907859872 -0.2733723058172187 -0.0002515287835096469 -0.02702956778346356 -96669.7835552101 -26.44920711447249
10 linear 0.25 6.0 9.0 -14840.859479571285 -4.520682292638562 -26510.179456423968 -8.033117756667396 -0.13734776448131925 -0.10212641096230607 -9.41162442338328e-05 -0.010115001392981545 -26510.179456423968 -8.033117756667396
11 linear 0.4 4.0 11.0 -17196.7642560167 -5.486915251242723 -74520.10209817477 -25.042311510043184 0.12217076984330788 0.09098828726103136 0.00010713887099822461 0.011516865671259795 -74520.10209817477 -25.042311510043184
12 linear 0.6 5.0 3.0 -14284.06615788641 -4.854766876637072 38417.71856593515 14.088596762512362 0.24251461234271687 0.1806530855220358 0.0002606811969937395 0.028024824619509187 38417.71856593515 14.088596762512362
13 linear 0.8 4.0 11.0 -10840.488575784548 -3.933600919557566 15749.581078662042 6.447651726824251 0.028051260535562506 0.020876236575910773 5.361882659971062e-05 0.005763158099097226 15749.581078662042 6.447651726824251
14 qtable 0.0 9.0 8.0 -18644.457288398524 -8.15323701554329 32993.42568058451 20.675688115613053 10.369779227648095 10.682768960780463 0.018566897519637582 2.0803084179092814 32993.42568058451 20.675688115613053 0.11839814814814797
15 qtable 0.1 6.0 5.0 -12549.400855549495 -4.616991193742389 -37207.79701261924 -15.336047254435487 0.0884057957559321 0.07703761042583206 -0.01127789819771663 -1.2272540823820444 -37207.79701261924 -15.336047254435487 0.07577777777777787 75.77777777777803
16 qtable 0.25 6.0 5.0 -1534.3527429780224 -0.5456640130847226 18433.43663451099 7.304472653867784 -0.5776125938941306 -0.45734160960552755 -0.003316338490628068 -0.3584028328803385 18433.43663451099 7.304472653867784 0.1181458333333334 47.258333333333354
17 qtable 0.4 8.0 6.0 -15146.258176090778 -5.274860187729517 -37364.22587794208 -13.005651205148677 0.4611471727478005 0.3629050099230144 0.0071046453227539 0.7751478467862876 -37364.22587794208 -13.005651205148677 0.11010416666666772 27.52604166666698
18 qtable 0.6 6.0 6.0 -9577.578548656049 -3.9322693501816666 -19088.152339068736 -9.571307395166029 0.9081750157567683 0.7495917946306662 0.0015520804425310786 0.16838348372043557 -19088.152339068736 -9.571307395166029 0.16983333333333228 28.305555555555333
19 qtable 0.8 5.0 2.0 -52751.680936846446 -19.699089872409548 -16508.209313987172 -7.589601869470744 -15.022454081083623 -11.215398490282094 -0.007791824761087751 -0.8384414846099099 -16508.209313987172 -7.589601869470744 0.11120000000000174 13.900000000000245
20 static 0.0 5.0 6.0 -4782.871053113384 -5.233544525848519 14411.4689779756 25.538141347978577 1.307060701942973 1.8731997380823568 0.002537468952847566 0.2911381045328444 14411.4689779756 25.538141347978577
21 static 0.1 8.0 5.0 1629.4524528499896 1.880088900553112 -5347.078589385725 -8.14812684380662 0.3600324838305795 0.5019134064795009 -4.6492644957929485e-05 -0.005316014641356001 -5347.078589385725 -8.14812684380662
22 static 0.25 5.0 6.0 -9938.662276761897 -10.398087633377964 -23616.087243780566 -27.701108621456626 -3.0513860773271233 -4.099238223547561 -0.003519771479853273 -0.40113716461596144 -23616.087243780566 -27.701108621456626
23 static 0.4 3.0 4.0 1850.8400595222774 2.1912497828943436 15058.659457798465 23.67199439061036 3.669612467486587 5.430169778169349 0.006763447803564415 0.7804393835882188 15058.659457798465 23.67199439061036
24 static 0.6 6.0 5.0 1038.893948415236 1.2765037688226162 -6062.864079504681 -9.363144945348399 -1.712609061865976 -2.3996341009364213 -0.0042285583442709385 -0.48362088973179423 -6062.864079504681 -9.363144945348399
25 static 0.8 3.0 7.0 2696.6340631967323 3.6826150812750567 149.22406835677975 0.27280281303997084 0.8491716126507072 1.2427748744725668 0.0032786525965587954 0.3777595573932637 149.22406835677975 0.27280281303997084
26 surge 0.0 6.0 6.0 -606.73760243367 -5.066579306500225 -244.17585425326251 -5.525800641331023 0.014874931199557295 0.09186560988877175 0.0019308940532419272 0.4471794260021321 -244.17585425326251 -5.525800641331023
27 surge 0.1 2.0 5.0 169.78743573408792 1.446343107913299 -1012.7706974660168 -20.02053666691211 -0.14459518037699226 -0.864651254901582 -0.0018650458785858248 -0.4260349899970559 -1012.7706974660168 -20.02053666691211
28 surge 0.25 10.0 7.0 -128.20993816584632 -1.1276930411162496 -81.21373487263281 -1.7081453033360994 0.3008506477195141 1.839047728806548 0.0030750148302954305 0.7102446987902812 -81.21373487263281 -1.7081453033360994
29 surge 0.4 6.0 6.0 -473.03722764431404 -4.297928307550563 28.557452243338048 0.6755106104955642 -0.5027452173053764 -3.072002360121898 -0.005581380442163164 -1.288152985482699 28.557452243338048 0.6755106104955642
30 surge 0.6 2.0 5.0 307.79436325796996 3.0356727142643067 2060.57396030564 63.382050333909866 0.2339650444065704 1.438519400758399 0.001302270025389629 0.30077697380833807 2060.57396030564 63.382050333909866
31 surge 0.8 3.0 3.0 423.15386247993047 4.372210191290083 1117.0942083304312 34.86182570616373 0.8971464536957541 5.327339899805159 0.007068630716831503 1.6094191039618562 1117.0942083304312 34.86182570616373

View File

@@ -1,61 +0,0 @@
tier,alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
dqn,0.0,no_robust,2,358369.40933039243,3531.782519351935,332534.46523867303,114183.5587841961,137.30089123035202,0.8184776440325546,0.9316352418598786,0.0006839003676302996,332534.46523867303,114183.5587841961,,
dqn,0.0,robust,5,327060.42191627494,24311.17412598574,330624.7224979635,62834.39223547943,134.40264757358202,6.160000643680792,0.9296631776227853,0.004262039730140749,330624.7224979635,62834.39223547943,0.17835000000000004,0.08829347371125472
dqn,0.1,no_robust,4,338912.58043645386,19584.736810155388,352449.13650924934,34076.74819101191,135.58860029055563,3.4055508991301524,0.9304589585186211,0.0023438665484978773,352449.13650924934,34076.74819101191,0.0999999999999998,0.0
dqn,0.1,robust,8,331189.03768078494,8060.912085646968,278209.7627908887,57861.69545853692,137.33218367069745,0.43113256118808096,0.931648154732905,0.000296560958972609,278209.7627908887,57861.69545853692,0.2761979166666664,0.09826648189130198
dqn,0.25,no_robust,3,333324.4996115304,6101.717861804452,258281.15112936878,46772.05216097596,137.2201692904545,0.9866477887862672,0.9315871706751672,0.0006356053229300815,258281.15112936878,46772.05216097596,0.25,0.0
dqn,0.25,robust,7,320979.6714216629,7345.8761269427705,351435.18740515393,40320.63699261721,137.25231473995316,0.3527287960309152,0.9316048080097395,0.0002575240668471541,351435.18740515393,40320.63699261721,0.39530952380952394,0.073021206240698
dqn,0.4,no_robust,10,316521.94295076875,3631.1820920182718,315859.66987697606,59129.03566963754,136.50715652926755,0.5085743959240285,0.931261495881483,0.00031280530251053175,315859.66987697606,59129.03566963754,0.3999999999999993,0.0
dqn,0.4,robust,5,308705.6422445519,10654.571556448245,273496.9231922617,68868.59270778317,137.13228376363833,0.9543108715306617,0.9315365574335323,0.0006302636717132419,273496.9231922617,68868.59270778317,0.49856666666666677,0.05745573175159429
dqn,0.6,no_robust,4,302011.2988903938,2354.1141598720183,280836.828756133,58683.00124997926,137.4522093492651,0.4692723362517602,0.9317606434396914,0.0003317518021682495,280836.828756133,58683.00124997926,0.600000000000001,0.0
dqn,0.6,robust,5,285861.2870026513,10386.571631344234,252328.08164747176,59388.56063758225,136.8191461176243,1.0629203361893034,0.9314152691051373,0.0005692783702932289,252328.08164747176,59388.56063758225,0.7361999999999991,0.07108625433623189
dqn,0.8,no_robust,6,282459.51189759385,2625.018247527438,273845.72691287595,66378.16690732416,137.4075681801531,0.29728950101826707,0.9317196295169007,0.00022799290978965786,273845.72691287595,66378.16690732416,0.7999999999999985,0.0
dqn,0.8,robust,7,264267.62923122395,6771.288971321149,218548.7825016336,50043.2009443344,136.61083454541807,1.2319662937254596,0.9310772310393415,0.0010118564779437284,218548.7825016336,50043.2009443344,0.9532857142857143,0.04709817507333055
linear,0.0,no_robust,8,350250.9723061577,3156.286820918861,304636.59490360576,71682.88027353655,134.2397614654424,0.32611787466946035,0.9302824910938235,0.00024020749661685483,304636.59490360576,71682.88027353655,,
linear,0.0,robust,9,335283.29842027643,7707.594869976611,284529.36318678834,55524.58819004573,134.1784835571803,0.4477314164684001,0.9302064136530284,0.00034781034181738526,284529.36318678834,55524.58819004573,,
linear,0.1,no_robust,5,342052.1032713031,2576.546352056584,365492.17954557994,44890.93522299766,134.65068807375954,0.2181027640393531,0.930569018064469,0.00014058935916940913,365492.17954557994,44890.93522299766,,
linear,0.1,robust,3,317520.7033697644,4796.580459456527,268822.39599036984,39256.421140635124,134.28259038297355,0.24570499109363475,0.9303174892809594,0.00018817899183709092,268822.39599036984,39256.421140635124,,
linear,0.25,no_robust,9,328288.0441241802,2178.525494145428,330011.0898339667,38591.36053388808,134.48799697074742,0.2199303973026469,0.9304619997297959,0.00015341642413402035,330011.0898339667,38591.36053388808,,
linear,0.25,robust,6,313447.18464460893,11811.426711620714,303500.9103775427,63358.917144214036,134.3506492062661,0.2947034403278951,0.9303678834855621,0.00021446628431268986,303500.9103775427,63358.917144214036,,
linear,0.4,no_robust,11,313414.0672597746,1982.9537556159262,297576.7714904776,69396.90446617964,134.2708754290745,0.3062093691351849,0.9302780292522507,0.00023067974755288992,297576.7714904776,69396.90446617964,,
linear,0.4,robust,4,296217.3030037579,5109.898340355844,223056.66939230284,38293.73688466607,134.3930461989178,0.12347753686382154,0.9303851681232489,7.324605809708878e-05,223056.66939230284,38293.73688466607,,
linear,0.6,no_robust,3,294227.64307441004,2081.9176570448135,272686.62176604365,66672.50905805513,134.24327165069943,0.30764332256042104,0.9301795837547151,0.00020453921786790446,272686.62176604365,66672.50905805513,,
linear,0.6,robust,5,279943.5769165236,9866.031719660255,311104.3403319788,28363.930707781863,134.48578626304214,0.21280262186464388,0.9304402649517088,0.00020533894868120649,311104.3403319788,28363.930707781863,,
linear,0.8,no_robust,11,275586.89347174135,1618.038877505867,244268.4832547461,56201.44465269986,134.36933631960773,0.2845660213184439,0.9303723007028001,0.00017640716421186918,244268.4832547461,56201.44465269986,,
linear,0.8,robust,4,264746.4048959568,7976.6279174956235,260018.06433340814,57942.49882730146,134.3973875801433,0.31511916357643405,0.9304259195293998,0.00023606570471334208,260018.06433340814,57942.49882730146,,
qtable,0.0,no_robust,8,228675.52179404112,103199.70453252994,159575.94976328663,95848.81008103945,97.07014413321637,33.0637115678536,0.8925069648229078,0.04890522141482132,159575.94976328663,95848.81008103945,0.0,0.0
qtable,0.0,robust,9,210031.0645056426,84361.3834579348,192569.37544387113,116824.7880426837,107.43992336086447,21.41128645838254,0.9110738623425454,0.019188350719133364,192569.37544387113,116824.7880426837,0.11839814814814797,0.061909456985161225
qtable,0.1,no_robust,5,271809.0706466638,14898.209045050968,242616.60384397948,49181.45526408063,114.75666919996793,3.461383158930426,0.9189538140159812,0.002294693249439748,242616.60384397948,49181.45526408063,0.0999999999999998,0.0
qtable,0.1,robust,6,259259.66979111428,102995.29934229614,205408.80683136024,94155.1845420674,114.84507499572386,36.206421837506966,0.9076759158182646,0.048591979839360346,205408.80683136024,94155.1845420674,0.17577777777777767,0.06720562696899951
qtable,0.25,no_robust,5,281190.01916657295,70274.10208723843,252358.2126733039,129868.46825082717,126.29784427276161,15.368804047323954,0.9253103453385114,0.009044883517550522,252358.2126733039,129868.46825082717,0.25,0.0
qtable,0.25,robust,6,279655.6664235949,93056.2549557545,270791.6493078149,116021.46257259768,125.72023167886748,26.760714047253796,0.9219940068478834,0.022785695882060884,270791.6493078149,116021.46257259768,0.3681458333333334,0.08845114686619042
qtable,0.4,no_robust,6,287140.4669895195,32698.16434426399,287292.23388022534,83855.95000252876,127.07104066863859,9.200301166154173,0.9165535777734913,0.01306001923887748,287292.23388022534,83855.95000252876,0.3999999999999993,0.0
qtable,0.4,robust,8,271994.2088134287,79259.3185780895,249928.00800228326,88265.30801790548,127.53218784138639,23.406428094683015,0.9236582230962452,0.020073747007871224,249928.00800228326,88265.30801790548,0.510104166666667,0.09294655989347765
qtable,0.6,no_robust,6,243563.64469828535,67006.60707045678,199430.98211127534,79119.52886604435,121.15594411011905,17.91243944823949,0.9217533740470492,0.011558797825966702,199430.98211127534,79119.52886604435,0.600000000000001,0.0
qtable,0.6,robust,6,233986.0661496293,43155.478617087436,180342.8297722066,48117.79957836251,122.06411912587582,12.160951090203252,0.9233054544895802,0.006840854872863436,180342.8297722066,48117.79957836251,0.7698333333333333,0.09107066853090896
qtable,0.8,no_robust,2,267787.4017455507,1552.038101264713,217510.87340156303,45358.788584678456,133.9448981157492,0.47346860040111405,0.9293224278749692,0.0002998116010539045,217510.87340156303,45358.788584678456,0.7999999999999985,0.0
qtable,0.8,robust,5,215035.72080870424,32869.73253165852,201002.66408757586,63247.67956376057,118.92244403466557,8.586916805142152,0.9215306031138815,0.004644709320891907,201002.66408757586,63247.67956376057,0.9112000000000002,0.07381653307732307
static,0.0,no_robust,6,91388.75248869567,13415.65534300268,56431.15832748852,8525.098185703384,69.77689967440658,3.670744870085874,0.8715688236409825,0.005831496806767582,56431.15832748852,8525.098185703384,,
static,0.0,robust,5,86605.88143558228,7614.909395960895,70842.62730546412,8033.737230392738,71.08396037634955,3.6802889678420283,0.8741062925938301,0.005083911544334936,70842.62730546412,8033.737230392738,,
static,0.1,no_robust,5,86668.90445290186,8037.955688932984,65623.40881389238,19329.448262530004,71.73199185012882,4.199046495412734,0.874577067494122,0.006610505646022198,65623.40881389238,19329.448262530004,,
static,0.1,robust,8,88298.35690575185,9576.838833058617,60276.33022450666,13359.490452744656,72.0920243339594,6.7706096714767865,0.8745305748491641,0.010083585815241344,60276.33022450666,13359.490452744656,,
static,0.25,no_robust,6,95581.63603909909,8345.698435455577,85253.22060752509,13111.526873622026,74.43788116042678,2.1078820386097368,0.8774483618896327,0.0037254791853004897,85253.22060752509,13111.526873622026,,
static,0.25,robust,5,85642.97376233719,9472.880627242153,61637.13336374452,15937.429780623212,71.38649508309966,4.0264905454627264,0.8739285904097794,0.005323853359397925,61637.13336374452,15937.429780623212,,
static,0.4,no_robust,4,84465.04245981346,12101.831388745604,63613.81812329075,7778.361846092061,67.5782271530322,3.9088888968092,0.8666205147756862,0.007149121199217965,63613.81812329075,7778.361846092061,,
static,0.4,robust,3,86315.88251933573,8642.748496122398,78672.47758108922,17823.74997200773,71.24783962051879,2.790416943786253,0.8733839625792507,0.005990544453538607,78672.47758108922,17823.74997200773,,
static,0.6,no_robust,5,81385.88962988024,12343.523894997037,64752.43216774836,23486.779472906223,71.36959177224794,5.100226704959064,0.874353948320141,0.007787250295491337,64752.43216774836,23486.779472906223,,
static,0.6,robust,6,82424.78357829548,9831.886701625144,58689.56808824368,12672.506035553573,69.65698271038197,3.484982360048201,0.8701253899758701,0.005917711231889304,58689.56808824368,12672.506035553573,,
static,0.8,no_robust,7,73226.06364450825,4447.877985963851,54700.340767716196,14406.881298569717,68.32867561883204,3.68262917356943,0.8679204886788817,0.007467501164611224,54700.340767716196,14406.881298569717,,
static,0.8,robust,3,75922.69770770498,5046.089536162847,54849.564836072976,22780.98012221352,69.17784723148274,1.5268167784698885,0.8711991412754405,0.0033278715575433297,54849.564836072976,22780.98012221352,,
surge,0.0,no_robust,6,11975.290738176132,411.4052900076416,4418.832131346071,896.5828048394391,16.192056219479124,0.8040364003224534,0.4317940274006973,0.008271862690929055,4418.832131346071,896.5828048394391,,
surge,0.0,robust,6,11368.553135742462,623.8217438159004,4174.6562770928085,639.9963040241264,16.20693115067868,0.9853827520149101,0.4337249214539392,0.010371668289035135,4174.6562770928085,639.9963040241264,,
surge,0.1,no_robust,5,11739.084232858655,332.778792718381,5058.659087494994,1110.8409258976824,16.722948073839394,0.6578121995950104,0.4377682402562083,0.005683401047550787,5058.659087494994,1110.8409258976824,,
surge,0.1,robust,2,11908.871668592743,81.41250285550258,4045.8883900289775,784.7169500268457,16.5783528934624,0.4088194924856508,0.4359031943776225,0.004531137621699143,4045.8883900289775,784.7169500268457,,
surge,0.25,no_robust,7,11369.223138855004,236.1121240061105,4754.4980344481255,1038.0550037539617,16.359045119223275,0.3945156775653057,0.4329514652531622,0.0038762110261952457,4754.4980344481255,1038.0550037539617,,
surge,0.25,robust,10,11241.013200689158,684.503587066406,4673.284299575493,1187.78635131025,16.65989576694279,1.0515950311117155,0.4360264800834576,0.009701952962125513,4673.284299575493,1187.78635131025,,
surge,0.4,no_robust,6,11006.168409400554,364.6584583108646,4227.535704048808,1414.7964077877168,16.365391636138824,0.9138430058543858,0.4332855262584901,0.008024003783434592,4227.535704048808,1414.7964077877168,,
surge,0.4,robust,6,10533.13118175624,526.0758051960169,4256.093156292146,783.7965507386594,15.862646418833448,0.7732699435426456,0.42770414581632693,0.008967505611725135,4256.093156292146,783.7965507386594,,
surge,0.6,no_robust,5,10139.2472848498,97.448078425168,3251.037082975553,742.2100315641153,16.26429537781848,0.4432465691073604,0.4329686574409998,0.004121820888165019,3251.037082975553,742.2100315641153,,
surge,0.6,robust,2,10447.04164810777,524.0029334247373,5311.611043281193,1808.6200710093085,16.49826042222505,0.6088756908260344,0.43427092746638946,0.007817511630542989,5311.611043281193,1808.6200710093085,,
surge,0.8,no_robust,3,9678.259826640971,272.83530913170915,3204.3479815026553,556.8799617962688,16.840420745981802,0.4589959822922529,0.43920385308157944,0.004953937449529005,3204.3479815026553,556.8799617962688,,
surge,0.8,robust,3,10101.413689120902,526.8318040489241,4321.442189833087,1284.166148011517,17.737567199677557,0.6586775330563983,0.44627248379841095,0.004644261847052545,4321.442189833087,1284.166148011517,,
1 tier alpha mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_margin_mean_mean eval_margin_mean_std objective_score_mean objective_score_std train_alpha_adv_mean train_alpha_adv_std
2 dqn 0.0 no_robust 2 358369.40933039243 3531.782519351935 332534.46523867303 114183.5587841961 137.30089123035202 0.8184776440325546 0.9316352418598786 0.0006839003676302996 332534.46523867303 114183.5587841961
3 dqn 0.0 robust 5 327060.42191627494 24311.17412598574 330624.7224979635 62834.39223547943 134.40264757358202 6.160000643680792 0.9296631776227853 0.004262039730140749 330624.7224979635 62834.39223547943 0.17835000000000004 0.08829347371125472
4 dqn 0.1 no_robust 4 338912.58043645386 19584.736810155388 352449.13650924934 34076.74819101191 135.58860029055563 3.4055508991301524 0.9304589585186211 0.0023438665484978773 352449.13650924934 34076.74819101191 0.0999999999999998 0.0
5 dqn 0.1 robust 8 331189.03768078494 8060.912085646968 278209.7627908887 57861.69545853692 137.33218367069745 0.43113256118808096 0.931648154732905 0.000296560958972609 278209.7627908887 57861.69545853692 0.2761979166666664 0.09826648189130198
6 dqn 0.25 no_robust 3 333324.4996115304 6101.717861804452 258281.15112936878 46772.05216097596 137.2201692904545 0.9866477887862672 0.9315871706751672 0.0006356053229300815 258281.15112936878 46772.05216097596 0.25 0.0
7 dqn 0.25 robust 7 320979.6714216629 7345.8761269427705 351435.18740515393 40320.63699261721 137.25231473995316 0.3527287960309152 0.9316048080097395 0.0002575240668471541 351435.18740515393 40320.63699261721 0.39530952380952394 0.073021206240698
8 dqn 0.4 no_robust 10 316521.94295076875 3631.1820920182718 315859.66987697606 59129.03566963754 136.50715652926755 0.5085743959240285 0.931261495881483 0.00031280530251053175 315859.66987697606 59129.03566963754 0.3999999999999993 0.0
9 dqn 0.4 robust 5 308705.6422445519 10654.571556448245 273496.9231922617 68868.59270778317 137.13228376363833 0.9543108715306617 0.9315365574335323 0.0006302636717132419 273496.9231922617 68868.59270778317 0.49856666666666677 0.05745573175159429
10 dqn 0.6 no_robust 4 302011.2988903938 2354.1141598720183 280836.828756133 58683.00124997926 137.4522093492651 0.4692723362517602 0.9317606434396914 0.0003317518021682495 280836.828756133 58683.00124997926 0.600000000000001 0.0
11 dqn 0.6 robust 5 285861.2870026513 10386.571631344234 252328.08164747176 59388.56063758225 136.8191461176243 1.0629203361893034 0.9314152691051373 0.0005692783702932289 252328.08164747176 59388.56063758225 0.7361999999999991 0.07108625433623189
12 dqn 0.8 no_robust 6 282459.51189759385 2625.018247527438 273845.72691287595 66378.16690732416 137.4075681801531 0.29728950101826707 0.9317196295169007 0.00022799290978965786 273845.72691287595 66378.16690732416 0.7999999999999985 0.0
13 dqn 0.8 robust 7 264267.62923122395 6771.288971321149 218548.7825016336 50043.2009443344 136.61083454541807 1.2319662937254596 0.9310772310393415 0.0010118564779437284 218548.7825016336 50043.2009443344 0.9532857142857143 0.04709817507333055
14 linear 0.0 no_robust 8 350250.9723061577 3156.286820918861 304636.59490360576 71682.88027353655 134.2397614654424 0.32611787466946035 0.9302824910938235 0.00024020749661685483 304636.59490360576 71682.88027353655
15 linear 0.0 robust 9 335283.29842027643 7707.594869976611 284529.36318678834 55524.58819004573 134.1784835571803 0.4477314164684001 0.9302064136530284 0.00034781034181738526 284529.36318678834 55524.58819004573
16 linear 0.1 no_robust 5 342052.1032713031 2576.546352056584 365492.17954557994 44890.93522299766 134.65068807375954 0.2181027640393531 0.930569018064469 0.00014058935916940913 365492.17954557994 44890.93522299766
17 linear 0.1 robust 3 317520.7033697644 4796.580459456527 268822.39599036984 39256.421140635124 134.28259038297355 0.24570499109363475 0.9303174892809594 0.00018817899183709092 268822.39599036984 39256.421140635124
18 linear 0.25 no_robust 9 328288.0441241802 2178.525494145428 330011.0898339667 38591.36053388808 134.48799697074742 0.2199303973026469 0.9304619997297959 0.00015341642413402035 330011.0898339667 38591.36053388808
19 linear 0.25 robust 6 313447.18464460893 11811.426711620714 303500.9103775427 63358.917144214036 134.3506492062661 0.2947034403278951 0.9303678834855621 0.00021446628431268986 303500.9103775427 63358.917144214036
20 linear 0.4 no_robust 11 313414.0672597746 1982.9537556159262 297576.7714904776 69396.90446617964 134.2708754290745 0.3062093691351849 0.9302780292522507 0.00023067974755288992 297576.7714904776 69396.90446617964
21 linear 0.4 robust 4 296217.3030037579 5109.898340355844 223056.66939230284 38293.73688466607 134.3930461989178 0.12347753686382154 0.9303851681232489 7.324605809708878e-05 223056.66939230284 38293.73688466607
22 linear 0.6 no_robust 3 294227.64307441004 2081.9176570448135 272686.62176604365 66672.50905805513 134.24327165069943 0.30764332256042104 0.9301795837547151 0.00020453921786790446 272686.62176604365 66672.50905805513
23 linear 0.6 robust 5 279943.5769165236 9866.031719660255 311104.3403319788 28363.930707781863 134.48578626304214 0.21280262186464388 0.9304402649517088 0.00020533894868120649 311104.3403319788 28363.930707781863
24 linear 0.8 no_robust 11 275586.89347174135 1618.038877505867 244268.4832547461 56201.44465269986 134.36933631960773 0.2845660213184439 0.9303723007028001 0.00017640716421186918 244268.4832547461 56201.44465269986
25 linear 0.8 robust 4 264746.4048959568 7976.6279174956235 260018.06433340814 57942.49882730146 134.3973875801433 0.31511916357643405 0.9304259195293998 0.00023606570471334208 260018.06433340814 57942.49882730146
26 qtable 0.0 no_robust 8 228675.52179404112 103199.70453252994 159575.94976328663 95848.81008103945 97.07014413321637 33.0637115678536 0.8925069648229078 0.04890522141482132 159575.94976328663 95848.81008103945 0.0 0.0
27 qtable 0.0 robust 9 210031.0645056426 84361.3834579348 192569.37544387113 116824.7880426837 107.43992336086447 21.41128645838254 0.9110738623425454 0.019188350719133364 192569.37544387113 116824.7880426837 0.11839814814814797 0.061909456985161225
28 qtable 0.1 no_robust 5 271809.0706466638 14898.209045050968 242616.60384397948 49181.45526408063 114.75666919996793 3.461383158930426 0.9189538140159812 0.002294693249439748 242616.60384397948 49181.45526408063 0.0999999999999998 0.0
29 qtable 0.1 robust 6 259259.66979111428 102995.29934229614 205408.80683136024 94155.1845420674 114.84507499572386 36.206421837506966 0.9076759158182646 0.048591979839360346 205408.80683136024 94155.1845420674 0.17577777777777767 0.06720562696899951
30 qtable 0.25 no_robust 5 281190.01916657295 70274.10208723843 252358.2126733039 129868.46825082717 126.29784427276161 15.368804047323954 0.9253103453385114 0.009044883517550522 252358.2126733039 129868.46825082717 0.25 0.0
31 qtable 0.25 robust 6 279655.6664235949 93056.2549557545 270791.6493078149 116021.46257259768 125.72023167886748 26.760714047253796 0.9219940068478834 0.022785695882060884 270791.6493078149 116021.46257259768 0.3681458333333334 0.08845114686619042
32 qtable 0.4 no_robust 6 287140.4669895195 32698.16434426399 287292.23388022534 83855.95000252876 127.07104066863859 9.200301166154173 0.9165535777734913 0.01306001923887748 287292.23388022534 83855.95000252876 0.3999999999999993 0.0
33 qtable 0.4 robust 8 271994.2088134287 79259.3185780895 249928.00800228326 88265.30801790548 127.53218784138639 23.406428094683015 0.9236582230962452 0.020073747007871224 249928.00800228326 88265.30801790548 0.510104166666667 0.09294655989347765
34 qtable 0.6 no_robust 6 243563.64469828535 67006.60707045678 199430.98211127534 79119.52886604435 121.15594411011905 17.91243944823949 0.9217533740470492 0.011558797825966702 199430.98211127534 79119.52886604435 0.600000000000001 0.0
35 qtable 0.6 robust 6 233986.0661496293 43155.478617087436 180342.8297722066 48117.79957836251 122.06411912587582 12.160951090203252 0.9233054544895802 0.006840854872863436 180342.8297722066 48117.79957836251 0.7698333333333333 0.09107066853090896
36 qtable 0.8 no_robust 2 267787.4017455507 1552.038101264713 217510.87340156303 45358.788584678456 133.9448981157492 0.47346860040111405 0.9293224278749692 0.0002998116010539045 217510.87340156303 45358.788584678456 0.7999999999999985 0.0
37 qtable 0.8 robust 5 215035.72080870424 32869.73253165852 201002.66408757586 63247.67956376057 118.92244403466557 8.586916805142152 0.9215306031138815 0.004644709320891907 201002.66408757586 63247.67956376057 0.9112000000000002 0.07381653307732307
38 static 0.0 no_robust 6 91388.75248869567 13415.65534300268 56431.15832748852 8525.098185703384 69.77689967440658 3.670744870085874 0.8715688236409825 0.005831496806767582 56431.15832748852 8525.098185703384
39 static 0.0 robust 5 86605.88143558228 7614.909395960895 70842.62730546412 8033.737230392738 71.08396037634955 3.6802889678420283 0.8741062925938301 0.005083911544334936 70842.62730546412 8033.737230392738
40 static 0.1 no_robust 5 86668.90445290186 8037.955688932984 65623.40881389238 19329.448262530004 71.73199185012882 4.199046495412734 0.874577067494122 0.006610505646022198 65623.40881389238 19329.448262530004
41 static 0.1 robust 8 88298.35690575185 9576.838833058617 60276.33022450666 13359.490452744656 72.0920243339594 6.7706096714767865 0.8745305748491641 0.010083585815241344 60276.33022450666 13359.490452744656
42 static 0.25 no_robust 6 95581.63603909909 8345.698435455577 85253.22060752509 13111.526873622026 74.43788116042678 2.1078820386097368 0.8774483618896327 0.0037254791853004897 85253.22060752509 13111.526873622026
43 static 0.25 robust 5 85642.97376233719 9472.880627242153 61637.13336374452 15937.429780623212 71.38649508309966 4.0264905454627264 0.8739285904097794 0.005323853359397925 61637.13336374452 15937.429780623212
44 static 0.4 no_robust 4 84465.04245981346 12101.831388745604 63613.81812329075 7778.361846092061 67.5782271530322 3.9088888968092 0.8666205147756862 0.007149121199217965 63613.81812329075 7778.361846092061
45 static 0.4 robust 3 86315.88251933573 8642.748496122398 78672.47758108922 17823.74997200773 71.24783962051879 2.790416943786253 0.8733839625792507 0.005990544453538607 78672.47758108922 17823.74997200773
46 static 0.6 no_robust 5 81385.88962988024 12343.523894997037 64752.43216774836 23486.779472906223 71.36959177224794 5.100226704959064 0.874353948320141 0.007787250295491337 64752.43216774836 23486.779472906223
47 static 0.6 robust 6 82424.78357829548 9831.886701625144 58689.56808824368 12672.506035553573 69.65698271038197 3.484982360048201 0.8701253899758701 0.005917711231889304 58689.56808824368 12672.506035553573
48 static 0.8 no_robust 7 73226.06364450825 4447.877985963851 54700.340767716196 14406.881298569717 68.32867561883204 3.68262917356943 0.8679204886788817 0.007467501164611224 54700.340767716196 14406.881298569717
49 static 0.8 robust 3 75922.69770770498 5046.089536162847 54849.564836072976 22780.98012221352 69.17784723148274 1.5268167784698885 0.8711991412754405 0.0033278715575433297 54849.564836072976 22780.98012221352
50 surge 0.0 no_robust 6 11975.290738176132 411.4052900076416 4418.832131346071 896.5828048394391 16.192056219479124 0.8040364003224534 0.4317940274006973 0.008271862690929055 4418.832131346071 896.5828048394391
51 surge 0.0 robust 6 11368.553135742462 623.8217438159004 4174.6562770928085 639.9963040241264 16.20693115067868 0.9853827520149101 0.4337249214539392 0.010371668289035135 4174.6562770928085 639.9963040241264
52 surge 0.1 no_robust 5 11739.084232858655 332.778792718381 5058.659087494994 1110.8409258976824 16.722948073839394 0.6578121995950104 0.4377682402562083 0.005683401047550787 5058.659087494994 1110.8409258976824
53 surge 0.1 robust 2 11908.871668592743 81.41250285550258 4045.8883900289775 784.7169500268457 16.5783528934624 0.4088194924856508 0.4359031943776225 0.004531137621699143 4045.8883900289775 784.7169500268457
54 surge 0.25 no_robust 7 11369.223138855004 236.1121240061105 4754.4980344481255 1038.0550037539617 16.359045119223275 0.3945156775653057 0.4329514652531622 0.0038762110261952457 4754.4980344481255 1038.0550037539617
55 surge 0.25 robust 10 11241.013200689158 684.503587066406 4673.284299575493 1187.78635131025 16.65989576694279 1.0515950311117155 0.4360264800834576 0.009701952962125513 4673.284299575493 1187.78635131025
56 surge 0.4 no_robust 6 11006.168409400554 364.6584583108646 4227.535704048808 1414.7964077877168 16.365391636138824 0.9138430058543858 0.4332855262584901 0.008024003783434592 4227.535704048808 1414.7964077877168
57 surge 0.4 robust 6 10533.13118175624 526.0758051960169 4256.093156292146 783.7965507386594 15.862646418833448 0.7732699435426456 0.42770414581632693 0.008967505611725135 4256.093156292146 783.7965507386594
58 surge 0.6 no_robust 5 10139.2472848498 97.448078425168 3251.037082975553 742.2100315641153 16.26429537781848 0.4432465691073604 0.4329686574409998 0.004121820888165019 3251.037082975553 742.2100315641153
59 surge 0.6 robust 2 10447.04164810777 524.0029334247373 5311.611043281193 1808.6200710093085 16.49826042222505 0.6088756908260344 0.43427092746638946 0.007817511630542989 5311.611043281193 1808.6200710093085
60 surge 0.8 no_robust 3 9678.259826640971 272.83530913170915 3204.3479815026553 556.8799617962688 16.840420745981802 0.4589959822922529 0.43920385308157944 0.004953937449529005 3204.3479815026553 556.8799617962688
61 surge 0.8 robust 3 10101.413689120902 526.8318040489241 4321.442189833087 1284.166148011517 17.737567199677557 0.6586775330563983 0.44627248379841095 0.004644261847052545 4321.442189833087 1284.166148011517

View File

@@ -1,11 +0,0 @@
tier,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
dqn,no_robust,29,315185.66674813855,23538.781000060844,302576.8036266896,62951.88633145167,136.82560356086017,1.3692652218935986,0.9313739013618878,0.0009314135057224836,302576.8036266896,62951.88633145167,0.45740740740740693,0.2368477698794438
dqn,robust,37,306875.13950902375,27585.74444520695,283724.7169827867,69843.05611741856,136.68837571992978,2.3797541654948753,0.9312171495138941,0.0016512408492580111,283724.7169827867,69843.05611741856,0.5058198198198196,0.28324483129860284
linear,no_robust,47,315501.15296155965,27105.014861872147,298149.1730416604,67664.7308344108,134.36884359609928,0.29743647613433244,0.9303607531364,0.0002152647006739543,298149.1730416604,67664.7308344108,,
linear,robust,31,306269.9232239004,26399.875293394463,279872.824370329,54401.104602086416,134.32737693008372,0.31909212993628877,0.9303375215162144,0.00025000448833182963,279872.824370329,54401.104602086416,,
qtable,no_robust,32,259818.72178238883,67188.58622318009,222088.83510765125,94450.12569617687,116.84641954166946,22.42810298937963,0.9140582213134033,0.02778864370791322,222088.83510765125,94450.12569617687,0.29218749999999993,0.2559326319498438
qtable,robust,40,244470.50673219413,78666.30912808319,216920.53697298188,93983.50987622296,118.94013969887506,23.1428303249914,0.9178608956089163,0.023827311253270544,216920.53697298188,93983.50987622296,0.4396239583333334,0.29521865862482416
static,no_robust,33,85228.452028227,12041.415672002751,64828.579890468536,17681.280330831738,70.58818912317687,4.204964531595236,0.8721419294578765,0.007107262779462876,64828.579890468536,17681.280330831738,,
static,robust,30,84963.18577955024,8926.291379160475,63243.76603076817,14880.924342692271,70.94358095957392,4.363134562111469,0.8730306888410219,0.006660289247744752,63243.76603076817,14880.924342692271,,
surge,no_robust,32,11121.867310184698,809.9895800277001,4260.038064073964,1160.4282377968032,16.416108827015794,0.641203520341943,0.43413855082681374,0.006214799767130059,4260.038064073964,1160.4282377968032,,
surge,robust,29,10994.355365953365,750.5115890942825,4448.160863178768,1000.7519971246122,16.495943148858906,0.9823026347466668,0.4347587896392907,0.009698591291108968,4448.160863178768,1000.7519971246122,,
1 tier mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_margin_mean_mean eval_margin_mean_std objective_score_mean objective_score_std train_alpha_adv_mean train_alpha_adv_std
2 dqn no_robust 29 315185.66674813855 23538.781000060844 302576.8036266896 62951.88633145167 136.82560356086017 1.3692652218935986 0.9313739013618878 0.0009314135057224836 302576.8036266896 62951.88633145167 0.45740740740740693 0.2368477698794438
3 dqn robust 37 306875.13950902375 27585.74444520695 283724.7169827867 69843.05611741856 136.68837571992978 2.3797541654948753 0.9312171495138941 0.0016512408492580111 283724.7169827867 69843.05611741856 0.5058198198198196 0.28324483129860284
4 linear no_robust 47 315501.15296155965 27105.014861872147 298149.1730416604 67664.7308344108 134.36884359609928 0.29743647613433244 0.9303607531364 0.0002152647006739543 298149.1730416604 67664.7308344108
5 linear robust 31 306269.9232239004 26399.875293394463 279872.824370329 54401.104602086416 134.32737693008372 0.31909212993628877 0.9303375215162144 0.00025000448833182963 279872.824370329 54401.104602086416
6 qtable no_robust 32 259818.72178238883 67188.58622318009 222088.83510765125 94450.12569617687 116.84641954166946 22.42810298937963 0.9140582213134033 0.02778864370791322 222088.83510765125 94450.12569617687 0.29218749999999993 0.2559326319498438
7 qtable robust 40 244470.50673219413 78666.30912808319 216920.53697298188 93983.50987622296 118.94013969887506 23.1428303249914 0.9178608956089163 0.023827311253270544 216920.53697298188 93983.50987622296 0.4396239583333334 0.29521865862482416
8 static no_robust 33 85228.452028227 12041.415672002751 64828.579890468536 17681.280330831738 70.58818912317687 4.204964531595236 0.8721419294578765 0.007107262779462876 64828.579890468536 17681.280330831738
9 static robust 30 84963.18577955024 8926.291379160475 63243.76603076817 14880.924342692271 70.94358095957392 4.363134562111469 0.8730306888410219 0.006660289247744752 63243.76603076817 14880.924342692271
10 surge no_robust 32 11121.867310184698 809.9895800277001 4260.038064073964 1160.4282377968032 16.416108827015794 0.641203520341943 0.43413855082681374 0.006214799767130059 4260.038064073964 1160.4282377968032
11 surge robust 29 10994.355365953365 750.5115890942825 4448.160863178768 1000.7519971246122 16.495943148858906 0.9823026347466668 0.4347587896392907 0.009698591291108968 4448.160863178768 1000.7519971246122

View File

@@ -1,26 +0,0 @@
Name,tier,alpha,mode,objective/score,eval/revenue_mean,eval/reward_mean,eval/coi_level_mean,lambda_coi,robust_radius,learning_rate,batch_size,n_steps,total_timesteps
eager-sweep-244,dqn,0.0,no_robust,413274.4339549909,355872.06196128257,413274.4339549909,136.722140138007,0.2,0.1,0.0003,256,4096,15000
efficient-sweep-319,linear,0.0,no_robust,410094.0151741567,353309.5198146561,410094.0151741567,134.55152038805429,0.4,0.1,0.001,128,4096,15000
swept-sweep-422,linear,0.0,no_robust,403130.32747386186,347611.2815474988,403130.32747386186,133.8559785775022,0.4,0.3,0.0001,512,1024,15000
decent-sweep-478,linear,0.1,no_robust,400452.36418713134,345284.5750647792,400452.36418713134,134.73082941975588,0.1,0.2,0.001,128,1024,50000
eternal-sweep-339,linear,0.1,no_robust,399628.4231731644,344154.38525771734,399628.4231731644,134.89479277649667,0.4,0.1,0.0001,256,1024,50000
ethereal-sweep-21,dqn,0.1,no_robust,398492.807245857,343580.6802427996,398492.807245857,136.67160732585188,0.1,0.2,0.001,512,2048,50000
dark-sweep-418,linear,0.1,no_robust,394615.3720658343,339749.76272695075,394615.3720658343,134.39233246711,0.2,0.1,0.0003,256,1024,50000
wandering-sweep-122,dqn,0.0,robust,394061.3617726404,339512.43434806296,394061.3617726404,137.6864755964331,0.1,0.3,0.0001,256,2048,30000
laced-sweep-132,dqn,0.1,robust,389274.54998495104,335600.5979215904,389274.54998495104,137.36888574027677,0.4,0.2,0.001,256,2048,30000
rich-sweep-53,qtable,0.0,robust,388601.2626147048,335630.6853337664,388601.2626147048,133.4414069888203,0.2,0.1,0.0001,512,1024,50000
faithful-sweep-430,qtable,0.25,no_robust,387035.6970938766,333255.5771210341,387035.6970938766,137.4906091183188,0.1,0.2,0.0003,128,1024,15000
dark-sweep-280,qtable,0.25,no_robust,386318.8845004527,332220.0316564078,386318.8845004527,137.26992450099925,0.4,0.1,0.0001,256,1024,50000
chocolate-sweep-383,linear,0.25,no_robust,383989.49015403807,331071.7003244704,383989.49015403807,134.60590742050857,0.1,0.2,0.001,512,1024,30000
dry-sweep-263,dqn,0.0,robust,383372.6880637367,330436.0312615148,383372.6880637367,137.40558130223476,0.1,0.3,0.001,128,1024,50000
different-sweep-143,qtable,0.0,robust,383278.4198015018,330546.16800945485,383278.4198015018,135.9021538079678,0.1,0.3,0.001,256,2048,30000
woven-sweep-139,dqn,0.25,robust,382788.1296637251,329427.735752473,382788.1296637251,136.8968339394894,0.1,0.1,0.001,512,1024,15000
dark-sweep-215,dqn,0.25,robust,382358.2401374872,329330.0097603144,382358.2401374872,137.64528612332785,0.2,0.1,0.0001,512,4096,30000
charmed-sweep-136,linear,0.25,no_robust,382249.5728044314,329646.2053260979,382249.5728044314,134.46825608007862,0.4,0.1,0.0001,256,2048,15000
light-sweep-308,linear,0.0,robust,381939.1275250679,329628.9436641051,381939.1275250679,133.6209821974879,0.2,0.2,0.001,128,4096,30000
treasured-sweep-325,linear,0.25,robust,381322.0104772589,328353.58675398555,381322.0104772589,134.8950293943581,0.1,0.1,0.0001,512,2048,15000
fine-sweep-202,dqn,0.25,robust,378751.33572275366,326518.9068184018,378751.33572275366,137.2900973301052,0.1,0.2,0.0001,512,2048,30000
treasured-sweep-380,linear,0.25,no_robust,377898.0979419424,325869.1953595453,377898.0979419424,134.54118723889738,0.4,0.3,0.001,128,1024,50000
pretty-sweep-49,qtable,0.25,robust,377318.4766808995,325282.0152823859,377318.4766808995,137.19609012644068,0.4,0.1,0.0001,128,4096,50000
desert-sweep-253,linear,0.25,robust,376808.6335063269,325146.3478714648,376808.6335063269,134.48396340732663,0.2,0.1,0.0003,256,1024,30000
jolly-sweep-133,qtable,0.4,no_robust,376419.57394710975,323709.24588324485,376419.57394710975,137.8349363778071,0.1,0.3,0.0001,128,2048,50000
1 Name tier alpha mode objective/score eval/revenue_mean eval/reward_mean eval/coi_level_mean lambda_coi robust_radius learning_rate batch_size n_steps total_timesteps
2 eager-sweep-244 dqn 0.0 no_robust 413274.4339549909 355872.06196128257 413274.4339549909 136.722140138007 0.2 0.1 0.0003 256 4096 15000
3 efficient-sweep-319 linear 0.0 no_robust 410094.0151741567 353309.5198146561 410094.0151741567 134.55152038805429 0.4 0.1 0.001 128 4096 15000
4 swept-sweep-422 linear 0.0 no_robust 403130.32747386186 347611.2815474988 403130.32747386186 133.8559785775022 0.4 0.3 0.0001 512 1024 15000
5 decent-sweep-478 linear 0.1 no_robust 400452.36418713134 345284.5750647792 400452.36418713134 134.73082941975588 0.1 0.2 0.001 128 1024 50000
6 eternal-sweep-339 linear 0.1 no_robust 399628.4231731644 344154.38525771734 399628.4231731644 134.89479277649667 0.4 0.1 0.0001 256 1024 50000
7 ethereal-sweep-21 dqn 0.1 no_robust 398492.807245857 343580.6802427996 398492.807245857 136.67160732585188 0.1 0.2 0.001 512 2048 50000
8 dark-sweep-418 linear 0.1 no_robust 394615.3720658343 339749.76272695075 394615.3720658343 134.39233246711 0.2 0.1 0.0003 256 1024 50000
9 wandering-sweep-122 dqn 0.0 robust 394061.3617726404 339512.43434806296 394061.3617726404 137.6864755964331 0.1 0.3 0.0001 256 2048 30000
10 laced-sweep-132 dqn 0.1 robust 389274.54998495104 335600.5979215904 389274.54998495104 137.36888574027677 0.4 0.2 0.001 256 2048 30000
11 rich-sweep-53 qtable 0.0 robust 388601.2626147048 335630.6853337664 388601.2626147048 133.4414069888203 0.2 0.1 0.0001 512 1024 50000
12 faithful-sweep-430 qtable 0.25 no_robust 387035.6970938766 333255.5771210341 387035.6970938766 137.4906091183188 0.1 0.2 0.0003 128 1024 15000
13 dark-sweep-280 qtable 0.25 no_robust 386318.8845004527 332220.0316564078 386318.8845004527 137.26992450099925 0.4 0.1 0.0001 256 1024 50000
14 chocolate-sweep-383 linear 0.25 no_robust 383989.49015403807 331071.7003244704 383989.49015403807 134.60590742050857 0.1 0.2 0.001 512 1024 30000
15 dry-sweep-263 dqn 0.0 robust 383372.6880637367 330436.0312615148 383372.6880637367 137.40558130223476 0.1 0.3 0.001 128 1024 50000
16 different-sweep-143 qtable 0.0 robust 383278.4198015018 330546.16800945485 383278.4198015018 135.9021538079678 0.1 0.3 0.001 256 2048 30000
17 woven-sweep-139 dqn 0.25 robust 382788.1296637251 329427.735752473 382788.1296637251 136.8968339394894 0.1 0.1 0.001 512 1024 15000
18 dark-sweep-215 dqn 0.25 robust 382358.2401374872 329330.0097603144 382358.2401374872 137.64528612332785 0.2 0.1 0.0001 512 4096 30000
19 charmed-sweep-136 linear 0.25 no_robust 382249.5728044314 329646.2053260979 382249.5728044314 134.46825608007862 0.4 0.1 0.0001 256 2048 15000
20 light-sweep-308 linear 0.0 robust 381939.1275250679 329628.9436641051 381939.1275250679 133.6209821974879 0.2 0.2 0.001 128 4096 30000
21 treasured-sweep-325 linear 0.25 robust 381322.0104772589 328353.58675398555 381322.0104772589 134.8950293943581 0.1 0.1 0.0001 512 2048 15000
22 fine-sweep-202 dqn 0.25 robust 378751.33572275366 326518.9068184018 378751.33572275366 137.2900973301052 0.1 0.2 0.0001 512 2048 30000
23 treasured-sweep-380 linear 0.25 no_robust 377898.0979419424 325869.1953595453 377898.0979419424 134.54118723889738 0.4 0.3 0.001 128 1024 50000
24 pretty-sweep-49 qtable 0.25 robust 377318.4766808995 325282.0152823859 377318.4766808995 137.19609012644068 0.4 0.1 0.0001 128 4096 50000
25 desert-sweep-253 linear 0.25 robust 376808.6335063269 325146.3478714648 376808.6335063269 134.48396340732663 0.2 0.1 0.0003 256 1024 30000
26 jolly-sweep-133 qtable 0.4 no_robust 376419.57394710975 323709.24588324485 376419.57394710975 137.8349363778071 0.1 0.3 0.0001 128 2048 50000

View File

@@ -1,7 +0,0 @@
alpha,runs_robust,runs_no_robust,eval_revenue_mean_robust,eval_revenue_mean_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_robust,eval_reward_mean_no_robust,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_robust,eval_coi_level_mean_no_robust,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_coi_leakage_mean_robust,eval_coi_leakage_mean_no_robust,eval_coi_leakage_mean_delta,eval_coi_leakage_mean_delta_pct,eval_volatility_mean_robust,eval_volatility_mean_no_robust,eval_volatility_mean_delta,eval_volatility_mean_delta_pct,eval_margin_mean_robust,eval_margin_mean_no_robust,eval_margin_mean_delta,eval_margin_mean_delta_pct,train_alpha_adv_robust,train_alpha_adv_no_robust,train_alpha_adv_delta,train_alpha_adv_delta_pct,train_coi_penalty_robust,train_coi_penalty_no_robust,train_coi_penalty_delta,train_coi_penalty_delta_pct,train_ux_penalty_robust,train_ux_penalty_no_robust,train_ux_penalty_delta,train_ux_penalty_delta_pct,train_agent_prob_robust,train_agent_prob_no_robust,train_agent_prob_delta,train_agent_prob_delta_pct
0.0,4.0,4.0,3379.9042994670963,3565.2912010160844,-185.38690154898813,-5.199768857482219,313527.4707462,331300.229069,-17772.758322799986,-5.364547550342456,137.08358925982625,137.28764358955686,-0.2040543297306101,-0.14863269875959326,0.1146626165658294,0.11861133504329742,-0.003948718477468013,-3.3291240470622716,0.06687153537785637,0.06445662162531288,0.0024149137525434905,3.746572022625408,0.9315273502623671,0.9317078361627993,-0.00018048590043218127,-0.019371512552207898,0.18958333333333333,,,,5.553200113221484,,,,61.35134238638615,66.58479574844135,-5.233453362055201,-7.859832418540847,0.12778212146468534,0.11615891320235115,0.011623208262334192,10.00629907933654
0.1,4.0,4.0,3307.028238366196,3458.002436284769,-150.97419791857283,-4.365936713473732,306772.49146475,321215.477968,-14442.986503249966,-4.4963544704059375,137.1182041122497,136.82757579763506,0.29062831461465066,0.21240478238427865,0.1128546052304944,0.11704917861668755,-0.004194573386193154,-3.5835991638433753,0.0685405649303561,0.06737596899527175,0.0011645959350843477,1.728503430007924,0.9315331673960889,0.9313276818191593,0.00020548557692967595,0.0220637248243606,0.2818749999999999,0.1,0.18187499999999987,181.87499999999986,5.079528726095333,,,,52.44772950699336,53.288869747139515,-0.841140240146153,-1.578453895039319,0.11644381911386253,0.11765277436070229,-0.0012089552468397546,-1.0275620387270383
0.25,4.0,4.0,3134.3438215278165,3300.5539051855053,-166.21008365768876,-5.035823938416998,290691.4771835,306522.90003785,-15831.422854350007,-5.16484179563586,136.89990884669214,136.71752459667877,0.18238425001337077,0.1334022471160229,0.11113957413522965,0.1139905600539111,-0.0028509859186814507,-2.50107194607439,0.06427159998376095,0.06846858821082077,-0.004196988227059828,-6.12980103246314,0.9314501501825461,0.9313053225630614,0.0001448276194846443,0.015551035302371268,0.44833333333333336,0.25,0.19833333333333336,79.33333333333334,4.7183804755060255,,,,49.04307009982127,55.2030005738411,-6.159930474019831,-11.158687770568074,0.10998505830218755,0.11684259343269415,-0.0068575351305066035,-5.869037077182653
0.4,4.0,4.0,2983.852437569374,3180.7872854626567,-196.9348478932825,-6.191386918369099,276545.26309355,295433.5405797,-18888.277486150037,-6.393409986248494,136.19210761854086,136.5783021470118,-0.38619452847095204,-0.2827641890402586,0.10875560547061063,0.11189234314151972,-0.0031367376709090927,-2.8033532794480807,0.07452230347799255,0.07104688223410768,0.003475421243884863,4.891729425132195,0.9307282962514367,0.9310542820602117,-0.0003259858087749645,-0.03501254599824534,0.5999999999999999,0.4000000000000001,0.1999999999999998,49.999999999999936,4.174996403604185,,,,47.99794119802058,50.794260008988424,-2.796318810967847,-5.505186630286606,0.10222958892923095,0.11161526349272373,-0.009385674563492777,-8.408952565976458
0.6,4.0,4.0,2789.0434220430398,2982.2460998252786,-193.20267778223888,-6.4784283830083,258688.11700405,277051.95613675,-18363.8391327,-6.628301560749781,136.86774320500828,136.81931587629953,0.04842732870875466,0.035395096371142916,0.10501047827147733,0.10802266412956946,-0.0030121858580921257,-2.788475809557069,0.06914180963767007,0.06698591531512615,0.0021558943225439137,3.2184292957732996,0.9314130089130337,0.9313849217310588,2.8087181974889575e-05,0.003015636319588161,0.7733333333333334,0.5999999999999999,0.17333333333333356,28.888888888888935,4.178300996512875,,,,39.928062615509425,47.86860429278531,-7.940541677275881,-16.588203885594947,0.11297979438696983,0.1162670925925253,-0.0032872982055554695,-2.827367686122743
0.8,4.0,4.0,2586.098242115281,2841.1305915063504,-255.03234939106915,-8.97643882169642,239765.24959855,264140.55002745,-24375.300428900024,-9.228155399224729,136.5038826686135,137.28163778418497,-0.7777551155714661,-0.5665397995864124,0.10253056902792507,0.1031498585902154,-0.0006192895622903344,-0.6003784888844036,0.07325665736408164,0.06592454978099352,0.007332107583088124,11.1219683827132,0.9311235469993302,0.9316596013994161,-0.0005360544000858614,-0.05753758124541101,1.0,0.8000000000000002,0.19999999999999984,24.99999999999998,3.5384100686094007,,,,37.14414699970415,37.43809775029793,-0.29395075059377973,-0.7851647606519765,0.09990322635678014,0.10432800196112454,-0.0044247756043444,-4.241215705437541
1 alpha runs_robust runs_no_robust eval_revenue_mean_robust eval_revenue_mean_no_robust eval_revenue_mean_delta eval_revenue_mean_delta_pct eval_reward_mean_robust eval_reward_mean_no_robust eval_reward_mean_delta eval_reward_mean_delta_pct eval_coi_level_mean_robust eval_coi_level_mean_no_robust eval_coi_level_mean_delta eval_coi_level_mean_delta_pct eval_coi_leakage_mean_robust eval_coi_leakage_mean_no_robust eval_coi_leakage_mean_delta eval_coi_leakage_mean_delta_pct eval_volatility_mean_robust eval_volatility_mean_no_robust eval_volatility_mean_delta eval_volatility_mean_delta_pct eval_margin_mean_robust eval_margin_mean_no_robust eval_margin_mean_delta eval_margin_mean_delta_pct train_alpha_adv_robust train_alpha_adv_no_robust train_alpha_adv_delta train_alpha_adv_delta_pct train_coi_penalty_robust train_coi_penalty_no_robust train_coi_penalty_delta train_coi_penalty_delta_pct train_ux_penalty_robust train_ux_penalty_no_robust train_ux_penalty_delta train_ux_penalty_delta_pct train_agent_prob_robust train_agent_prob_no_robust train_agent_prob_delta train_agent_prob_delta_pct
2 0.0 4.0 4.0 3379.9042994670963 3565.2912010160844 -185.38690154898813 -5.199768857482219 313527.4707462 331300.229069 -17772.758322799986 -5.364547550342456 137.08358925982625 137.28764358955686 -0.2040543297306101 -0.14863269875959326 0.1146626165658294 0.11861133504329742 -0.003948718477468013 -3.3291240470622716 0.06687153537785637 0.06445662162531288 0.0024149137525434905 3.746572022625408 0.9315273502623671 0.9317078361627993 -0.00018048590043218127 -0.019371512552207898 0.18958333333333333 5.553200113221484 61.35134238638615 66.58479574844135 -5.233453362055201 -7.859832418540847 0.12778212146468534 0.11615891320235115 0.011623208262334192 10.00629907933654
3 0.1 4.0 4.0 3307.028238366196 3458.002436284769 -150.97419791857283 -4.365936713473732 306772.49146475 321215.477968 -14442.986503249966 -4.4963544704059375 137.1182041122497 136.82757579763506 0.29062831461465066 0.21240478238427865 0.1128546052304944 0.11704917861668755 -0.004194573386193154 -3.5835991638433753 0.0685405649303561 0.06737596899527175 0.0011645959350843477 1.728503430007924 0.9315331673960889 0.9313276818191593 0.00020548557692967595 0.0220637248243606 0.2818749999999999 0.1 0.18187499999999987 181.87499999999986 5.079528726095333 52.44772950699336 53.288869747139515 -0.841140240146153 -1.578453895039319 0.11644381911386253 0.11765277436070229 -0.0012089552468397546 -1.0275620387270383
4 0.25 4.0 4.0 3134.3438215278165 3300.5539051855053 -166.21008365768876 -5.035823938416998 290691.4771835 306522.90003785 -15831.422854350007 -5.16484179563586 136.89990884669214 136.71752459667877 0.18238425001337077 0.1334022471160229 0.11113957413522965 0.1139905600539111 -0.0028509859186814507 -2.50107194607439 0.06427159998376095 0.06846858821082077 -0.004196988227059828 -6.12980103246314 0.9314501501825461 0.9313053225630614 0.0001448276194846443 0.015551035302371268 0.44833333333333336 0.25 0.19833333333333336 79.33333333333334 4.7183804755060255 49.04307009982127 55.2030005738411 -6.159930474019831 -11.158687770568074 0.10998505830218755 0.11684259343269415 -0.0068575351305066035 -5.869037077182653
5 0.4 4.0 4.0 2983.852437569374 3180.7872854626567 -196.9348478932825 -6.191386918369099 276545.26309355 295433.5405797 -18888.277486150037 -6.393409986248494 136.19210761854086 136.5783021470118 -0.38619452847095204 -0.2827641890402586 0.10875560547061063 0.11189234314151972 -0.0031367376709090927 -2.8033532794480807 0.07452230347799255 0.07104688223410768 0.003475421243884863 4.891729425132195 0.9307282962514367 0.9310542820602117 -0.0003259858087749645 -0.03501254599824534 0.5999999999999999 0.4000000000000001 0.1999999999999998 49.999999999999936 4.174996403604185 47.99794119802058 50.794260008988424 -2.796318810967847 -5.505186630286606 0.10222958892923095 0.11161526349272373 -0.009385674563492777 -8.408952565976458
6 0.6 4.0 4.0 2789.0434220430398 2982.2460998252786 -193.20267778223888 -6.4784283830083 258688.11700405 277051.95613675 -18363.8391327 -6.628301560749781 136.86774320500828 136.81931587629953 0.04842732870875466 0.035395096371142916 0.10501047827147733 0.10802266412956946 -0.0030121858580921257 -2.788475809557069 0.06914180963767007 0.06698591531512615 0.0021558943225439137 3.2184292957732996 0.9314130089130337 0.9313849217310588 2.8087181974889575e-05 0.003015636319588161 0.7733333333333334 0.5999999999999999 0.17333333333333356 28.888888888888935 4.178300996512875 39.928062615509425 47.86860429278531 -7.940541677275881 -16.588203885594947 0.11297979438696983 0.1162670925925253 -0.0032872982055554695 -2.827367686122743
7 0.8 4.0 4.0 2586.098242115281 2841.1305915063504 -255.03234939106915 -8.97643882169642 239765.24959855 264140.55002745 -24375.300428900024 -9.228155399224729 136.5038826686135 137.28163778418497 -0.7777551155714661 -0.5665397995864124 0.10253056902792507 0.1031498585902154 -0.0006192895622903344 -0.6003784888844036 0.07325665736408164 0.06592454978099352 0.007332107583088124 11.1219683827132 0.9311235469993302 0.9316596013994161 -0.0005360544000858614 -0.05753758124541101 1.0 0.8000000000000002 0.19999999999999984 24.99999999999998 3.5384100686094007 37.14414699970415 37.43809775029793 -0.29395075059377973 -0.7851647606519765 0.09990322635678014 0.10432800196112454 -0.0044247756043444 -4.241215705437541

View File

@@ -1,13 +0,0 @@
alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
0.0,no_robust,4,3565.2912010160844,52.219179508209216,331300.229069,5038.96659004527,137.28764358955686,0.6434240315013728,0.11861133504329742,0.004019332768284657,0.06445662162531288,0.004080405219050139,0.9317078361627993,0.00038018051704976865,,,,,66.58479574844135,32.282270089830455,0.11615891320235115,0.016558627227281013
0.0,robust,4,3379.9042994670963,54.727408939657735,313527.4707462,5408.058196552377,137.08358925982625,1.047386315387148,0.1146626165658294,0.0025627354157035497,0.06687153537785637,0.008577061675868377,0.9315273502623671,0.0007274203134899985,0.18958333333333333,0.02083333333333336,5.553200113221484,0.45981481828856186,61.35134238638615,30.27964905193963,0.12778212146468534,0.027929667978205217
0.1,no_robust,4,3458.002436284769,60.75923217871363,321215.477968,6016.373193216596,136.82757579763506,1.1899102161551907,0.11704917861668755,0.0021220259908233973,0.06737596899527175,0.006801136773079149,0.9313276818191593,0.0008352263172197586,0.1,0.0,,,53.288869747139515,18.480340945815023,0.11765277436070229,0.017544197575138736
0.1,robust,4,3307.028238366196,35.58495715224888,306772.49146475,3488.2690530060245,137.1182041122497,0.8582218376452346,0.1128546052304944,0.0005963155492967403,0.0685405649303561,0.0050673362512629015,0.9315331673960889,0.0005217376436765336,0.2818749999999999,0.03624999999999999,5.079528726095333,0.6109585102054891,52.44772950699336,29.0263361696475,0.11644381911386253,0.021152545180088765
0.25,no_robust,4,3300.5539051855053,50.460978662647115,306522.90003785,4860.668937531515,136.71752459667877,0.7410676951244369,0.1139905600539111,0.003319948537321803,0.06846858821082077,0.008614994548315848,0.9313053225630614,0.0004919872662680591,0.25,0.0,,,55.2030005738411,26.88247558235345,0.11684259343269415,0.013462146346772591
0.25,robust,4,3134.3438215278165,64.06834403659167,290691.4771835,6331.196493752059,136.89990884669214,1.3796663751798552,0.11113957413522965,0.0015044942041406348,0.06427159998376095,0.0042331619171274894,0.9314501501825461,0.0008939739741734515,0.44833333333333336,0.0033333333333333518,4.7183804755060255,0.4538389380858333,49.04307009982127,28.20484665432831,0.10998505830218755,0.010731404693185651
0.4,no_robust,4,3180.7872854626567,71.87564776824694,295433.5405797,7035.374110540269,136.5783021470118,1.7095219574599192,0.11189234314151972,0.0013821115134030936,0.07104688223410768,0.005766138692685495,0.9310542820602117,0.0013989725050689828,0.4000000000000001,0.0,,,50.794260008988424,24.836708377642946,0.11161526349272373,0.005787749200301594
0.4,robust,4,2983.852437569374,45.51290575912758,276545.26309355,4555.1725323898245,136.19210761854086,1.5546063667946701,0.10875560547061063,0.001118798290958954,0.07452230347799255,0.0040446395928049874,0.9307282962514367,0.0013558080014763189,0.5999999999999999,0.0,4.174996403604185,0.12189448324552496,47.99794119802058,33.51782503281748,0.10222958892923095,0.0031686467591609474
0.6,no_robust,4,2982.2460998252786,39.93674476199945,277051.95613675,3931.02017169463,136.81931587629953,1.1995405806950865,0.10802266412956946,0.000405835985606262,0.06698591531512615,0.002805894772223563,0.9313849217310588,0.0008100530228792662,0.5999999999999999,0.0,,,47.86860429278531,23.830502772642472,0.1162670925925253,0.028676813474186293
0.6,robust,4,2789.0434220430398,35.297482315631626,258688.11700405,3420.6735023624556,136.86774320500828,0.7097303238857778,0.10501047827147733,0.0008273121554488608,0.06914180963767007,0.009066158371268139,0.9314130089130337,0.0005024421703994162,0.7733333333333334,0.053333333333333385,4.178300996512875,0.5865970573865015,39.928062615509425,30.25078643153115,0.11297979438696983,0.0274101056520461
0.8,no_robust,4,2841.1305915063504,21.84043179776092,264140.55002745,2073.353315114627,137.28163778418497,0.6288968799501957,0.1031498585902154,0.0012877581835795701,0.06592454978099352,0.00340700896766341,0.9316596013994161,0.00038430108058413553,0.8000000000000002,0.0,,,37.43809775029793,32.01740090550489,0.10432800196112454,0.018337841526911584
0.8,robust,4,2586.098242115281,48.05539265296157,239765.24959855,4681.6472175597555,136.5038826686135,1.0611320896043694,0.10253056902792507,0.002587472569909977,0.07325665736408164,0.0015359324114246234,0.9311235469993302,0.0006145440308596868,1.0,0.0,3.5384100686094007,0.391972726035734,37.14414699970415,25.614063825315505,0.09990322635678014,0.010269342031085898
1 alpha mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_coi_leakage_mean_mean eval_coi_leakage_mean_std eval_volatility_mean_mean eval_volatility_mean_std eval_margin_mean_mean eval_margin_mean_std train_alpha_adv_mean train_alpha_adv_std train_coi_penalty_mean train_coi_penalty_std train_ux_penalty_mean train_ux_penalty_std train_agent_prob_mean train_agent_prob_std
2 0.0 no_robust 4 3565.2912010160844 52.219179508209216 331300.229069 5038.96659004527 137.28764358955686 0.6434240315013728 0.11861133504329742 0.004019332768284657 0.06445662162531288 0.004080405219050139 0.9317078361627993 0.00038018051704976865 66.58479574844135 32.282270089830455 0.11615891320235115 0.016558627227281013
3 0.0 robust 4 3379.9042994670963 54.727408939657735 313527.4707462 5408.058196552377 137.08358925982625 1.047386315387148 0.1146626165658294 0.0025627354157035497 0.06687153537785637 0.008577061675868377 0.9315273502623671 0.0007274203134899985 0.18958333333333333 0.02083333333333336 5.553200113221484 0.45981481828856186 61.35134238638615 30.27964905193963 0.12778212146468534 0.027929667978205217
4 0.1 no_robust 4 3458.002436284769 60.75923217871363 321215.477968 6016.373193216596 136.82757579763506 1.1899102161551907 0.11704917861668755 0.0021220259908233973 0.06737596899527175 0.006801136773079149 0.9313276818191593 0.0008352263172197586 0.1 0.0 53.288869747139515 18.480340945815023 0.11765277436070229 0.017544197575138736
5 0.1 robust 4 3307.028238366196 35.58495715224888 306772.49146475 3488.2690530060245 137.1182041122497 0.8582218376452346 0.1128546052304944 0.0005963155492967403 0.0685405649303561 0.0050673362512629015 0.9315331673960889 0.0005217376436765336 0.2818749999999999 0.03624999999999999 5.079528726095333 0.6109585102054891 52.44772950699336 29.0263361696475 0.11644381911386253 0.021152545180088765
6 0.25 no_robust 4 3300.5539051855053 50.460978662647115 306522.90003785 4860.668937531515 136.71752459667877 0.7410676951244369 0.1139905600539111 0.003319948537321803 0.06846858821082077 0.008614994548315848 0.9313053225630614 0.0004919872662680591 0.25 0.0 55.2030005738411 26.88247558235345 0.11684259343269415 0.013462146346772591
7 0.25 robust 4 3134.3438215278165 64.06834403659167 290691.4771835 6331.196493752059 136.89990884669214 1.3796663751798552 0.11113957413522965 0.0015044942041406348 0.06427159998376095 0.0042331619171274894 0.9314501501825461 0.0008939739741734515 0.44833333333333336 0.0033333333333333518 4.7183804755060255 0.4538389380858333 49.04307009982127 28.20484665432831 0.10998505830218755 0.010731404693185651
8 0.4 no_robust 4 3180.7872854626567 71.87564776824694 295433.5405797 7035.374110540269 136.5783021470118 1.7095219574599192 0.11189234314151972 0.0013821115134030936 0.07104688223410768 0.005766138692685495 0.9310542820602117 0.0013989725050689828 0.4000000000000001 0.0 50.794260008988424 24.836708377642946 0.11161526349272373 0.005787749200301594
9 0.4 robust 4 2983.852437569374 45.51290575912758 276545.26309355 4555.1725323898245 136.19210761854086 1.5546063667946701 0.10875560547061063 0.001118798290958954 0.07452230347799255 0.0040446395928049874 0.9307282962514367 0.0013558080014763189 0.5999999999999999 0.0 4.174996403604185 0.12189448324552496 47.99794119802058 33.51782503281748 0.10222958892923095 0.0031686467591609474
10 0.6 no_robust 4 2982.2460998252786 39.93674476199945 277051.95613675 3931.02017169463 136.81931587629953 1.1995405806950865 0.10802266412956946 0.000405835985606262 0.06698591531512615 0.002805894772223563 0.9313849217310588 0.0008100530228792662 0.5999999999999999 0.0 47.86860429278531 23.830502772642472 0.1162670925925253 0.028676813474186293
11 0.6 robust 4 2789.0434220430398 35.297482315631626 258688.11700405 3420.6735023624556 136.86774320500828 0.7097303238857778 0.10501047827147733 0.0008273121554488608 0.06914180963767007 0.009066158371268139 0.9314130089130337 0.0005024421703994162 0.7733333333333334 0.053333333333333385 4.178300996512875 0.5865970573865015 39.928062615509425 30.25078643153115 0.11297979438696983 0.0274101056520461
12 0.8 no_robust 4 2841.1305915063504 21.84043179776092 264140.55002745 2073.353315114627 137.28163778418497 0.6288968799501957 0.1031498585902154 0.0012877581835795701 0.06592454978099352 0.00340700896766341 0.9316596013994161 0.00038430108058413553 0.8000000000000002 0.0 37.43809775029793 32.01740090550489 0.10432800196112454 0.018337841526911584
13 0.8 robust 4 2586.098242115281 48.05539265296157 239765.24959855 4681.6472175597555 136.5038826686135 1.0611320896043694 0.10253056902792507 0.002587472569909977 0.07325665736408164 0.0015359324114246234 0.9311235469993302 0.0006145440308596868 1.0 0.0 3.5384100686094007 0.391972726035734 37.14414699970415 25.614063825315505 0.09990322635678014 0.010269342031085898

View File

@@ -1,7 +0,0 @@
{
"status": "ok",
"revenue_delta": -191.29017636530716,
"revenue_delta_pct": -5.938226273545598,
"coi_leakage_delta": -0.002960415145605702,
"coi_leakage_delta_pct": -2.6404147469510946
}

View File

@@ -1,3 +0,0 @@
mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
no_robust,24,3221.335253213441,262.46595166337727,299277.442303125,24382.561944761477,136.9186666318945,1.0038463876967063,0.11211932326253345,0.005805494533542669,0.06737642102693879,0.005402738047823369,0.9314066076226178,0.0007436370959663933,0.43,0.2546411303445653,,,51.86293802024894,25.340287421525442,0.11381077317368686,0.016664235359362907
robust,24,3030.0450768481337,288.262657026656,280998.34484843333,26820.020161880373,136.77757261848845,1.06224696086916,0.10915890811692774,0.004616462637659704,0.06943407846195294,0.006435789449278624,0.9312959200008004,0.0007858424519830652,0.5488541666666666,0.2860373751485706,4.540469463924883,0.7906156355346259,47.985382134405825,27.407657819442747,0.11155393475895271,0.01943348418653492
1 mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_coi_leakage_mean_mean eval_coi_leakage_mean_std eval_volatility_mean_mean eval_volatility_mean_std eval_margin_mean_mean eval_margin_mean_std train_alpha_adv_mean train_alpha_adv_std train_coi_penalty_mean train_coi_penalty_std train_ux_penalty_mean train_ux_penalty_std train_agent_prob_mean train_agent_prob_std
2 no_robust 24 3221.335253213441 262.46595166337727 299277.442303125 24382.561944761477 136.9186666318945 1.0038463876967063 0.11211932326253345 0.005805494533542669 0.06737642102693879 0.005402738047823369 0.9314066076226178 0.0007436370959663933 0.43 0.2546411303445653 51.86293802024894 25.340287421525442 0.11381077317368686 0.016664235359362907
3 robust 24 3030.0450768481337 288.262657026656 280998.34484843333 26820.020161880373 136.77757261848845 1.06224696086916 0.10915890811692774 0.004616462637659704 0.06943407846195294 0.006435789449278624 0.9312959200008004 0.0007858424519830652 0.5488541666666666 0.2860373751485706 4.540469463924883 0.7906156355346259 47.985382134405825 27.407657819442747 0.11155393475895271 0.01943348418653492

View File

@@ -1,25 +0,0 @@
alpha,metric,direction,wins,ties,total_pairs,win_probability
0.0,eval/revenue_mean,higher,0,0,16,0.0
0.0,eval/reward_mean,higher,0,0,16,0.0
0.0,eval/coi_leakage_mean,lower,14,0,16,0.875
0.0,eval/volatility_mean,lower,8,0,16,0.5
0.1,eval/revenue_mean,higher,0,0,16,0.0
0.1,eval/reward_mean,higher,0,0,16,0.0
0.1,eval/coi_leakage_mean,lower,16,0,16,1.0
0.1,eval/volatility_mean,lower,8,0,16,0.5
0.25,eval/revenue_mean,higher,0,0,16,0.0
0.25,eval/reward_mean,higher,0,0,16,0.0
0.25,eval/coi_leakage_mean,lower,12,0,16,0.75
0.25,eval/volatility_mean,lower,11,0,16,0.6875
0.4,eval/revenue_mean,higher,0,0,16,0.0
0.4,eval/reward_mean,higher,0,0,16,0.0
0.4,eval/coi_leakage_mean,lower,16,0,16,1.0
0.4,eval/volatility_mean,lower,6,0,16,0.375
0.6,eval/revenue_mean,higher,0,0,16,0.0
0.6,eval/reward_mean,higher,0,0,16,0.0
0.6,eval/coi_leakage_mean,lower,16,0,16,1.0
0.6,eval/volatility_mean,lower,7,0,16,0.4375
0.8,eval/revenue_mean,higher,0,0,16,0.0
0.8,eval/reward_mean,higher,0,0,16,0.0
0.8,eval/coi_leakage_mean,lower,11,0,16,0.6875
0.8,eval/volatility_mean,lower,0,0,16,0.0
1 alpha metric direction wins ties total_pairs win_probability
2 0.0 eval/revenue_mean higher 0 0 16 0.0
3 0.0 eval/reward_mean higher 0 0 16 0.0
4 0.0 eval/coi_leakage_mean lower 14 0 16 0.875
5 0.0 eval/volatility_mean lower 8 0 16 0.5
6 0.1 eval/revenue_mean higher 0 0 16 0.0
7 0.1 eval/reward_mean higher 0 0 16 0.0
8 0.1 eval/coi_leakage_mean lower 16 0 16 1.0
9 0.1 eval/volatility_mean lower 8 0 16 0.5
10 0.25 eval/revenue_mean higher 0 0 16 0.0
11 0.25 eval/reward_mean higher 0 0 16 0.0
12 0.25 eval/coi_leakage_mean lower 12 0 16 0.75
13 0.25 eval/volatility_mean lower 11 0 16 0.6875
14 0.4 eval/revenue_mean higher 0 0 16 0.0
15 0.4 eval/reward_mean higher 0 0 16 0.0
16 0.4 eval/coi_leakage_mean lower 16 0 16 1.0
17 0.4 eval/volatility_mean lower 6 0 16 0.375
18 0.6 eval/revenue_mean higher 0 0 16 0.0
19 0.6 eval/reward_mean higher 0 0 16 0.0
20 0.6 eval/coi_leakage_mean lower 16 0 16 1.0
21 0.6 eval/volatility_mean lower 7 0 16 0.4375
22 0.8 eval/revenue_mean higher 0 0 16 0.0
23 0.8 eval/reward_mean higher 0 0 16 0.0
24 0.8 eval/coi_leakage_mean lower 11 0 16 0.6875
25 0.8 eval/volatility_mean lower 0 0 16 0.0

View File

@@ -1 +1 @@
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_alpha_curves.pdf}
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/final/plots/final_focus_coi_by_alpha.pdf}

View File

@@ -1 +1 @@
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_delta_curves.pdf}
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/final/plots/final_focus_coi_preservation_grid.pdf}

View File

@@ -1 +0,0 @@
\includegraphics[width=0.99\linewidth]{chapters/figures/results/generated/legacy/plots/first_sweep_tier_revenue.pdf}

View File

@@ -1 +0,0 @@
\includegraphics[width=0.88\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_tradeoff_scatter.pdf}

View File

@@ -1,313 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import numpy as np
import pandas as pd
from process_first_sweep import run as run_first_sweep
from process_ppo_benchmark import run as run_ppo_benchmark
def _output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _plot_dir() -> Path:
return _output_dir() / "plots"
def _configure_style() -> None:
plt.rcParams.update(
{
"font.family": "serif",
"font.size": 10,
"axes.titlesize": 10,
"axes.labelsize": 9,
"legend.fontsize": 8,
"xtick.labelsize": 8,
"ytick.labelsize": 8,
"figure.dpi": 220,
"savefig.dpi": 320,
"axes.spines.top": False,
"axes.spines.right": False,
"axes.grid": True,
"grid.alpha": 0.22,
}
)
def _fmt_thousands(value: float, _: int) -> str:
return f"{int(value):,}"
def _load_csv(path: Path) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"Missing required input: {path}")
return pd.read_csv(path)
def _plot_ppo_alpha_curves(alpha_mode: pd.DataFrame, out_dir: Path) -> Path:
fig, axes = plt.subplots(2, 2, figsize=(9.3, 6.4), constrained_layout=True)
robust_color = "#C44E52"
baseline_color = "#4C72B0"
mode_colors = {"robust": robust_color, "no_robust": baseline_color}
mode_labels = {"robust": "Robust", "no_robust": "Non-robust"}
panels = [
("eval_revenue_mean", "Mean Episode Revenue", "Revenue"),
("eval_reward_mean", "Mean Episode Reward", "Reward"),
("eval_coi_leakage_mean", "Mean COI Leakage", "COI Leakage"),
("eval_volatility_mean", "Mean Price Volatility", "Volatility"),
]
for ax, (metric_prefix, title, ylabel) in zip(axes.flat, panels):
mean_col = f"{metric_prefix}_mean"
std_col = f"{metric_prefix}_std"
for mode in ("no_robust", "robust"):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
x = sub["alpha"].to_numpy(dtype=float)
y = sub[mean_col].to_numpy(dtype=float)
ax.plot(
x,
y,
marker="o",
linewidth=1.8,
markersize=4,
color=mode_colors[mode],
label=mode_labels[mode],
)
if std_col in sub.columns:
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
ax.fill_between(
x,
y - sigma,
y + sigma,
color=mode_colors[mode],
alpha=0.14,
linewidth=0,
)
ax.set_title(title)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel(ylabel)
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
if metric_prefix in {"eval_revenue_mean", "eval_reward_mean"}:
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
handles, labels = axes.flat[0].get_legend_handles_labels()
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.02))
out_path = out_dir / "ppo_alpha_curves.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_ppo_delta_curves(deltas: pd.DataFrame, out_dir: Path) -> Path:
fig, axes = plt.subplots(2, 1, figsize=(8.6, 6.0), constrained_layout=True)
deltas = deltas.sort_values("alpha")
x = deltas["alpha"].to_numpy(dtype=float)
top_metrics = [
("eval_revenue_mean_delta_pct", "Revenue", "#4C72B0"),
("eval_reward_mean_delta_pct", "Reward", "#8172B3"),
]
for col, label, color in top_metrics:
axes[0].plot(
x,
deltas[col].to_numpy(dtype=float),
marker="o",
linewidth=1.8,
markersize=4,
color=color,
label=label,
)
axes[0].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[0].set_title("Robust Minus Non-robust Delta by Contamination")
axes[0].set_ylabel("Delta (%)")
axes[0].set_xlabel(r"Contamination $\alpha$")
axes[0].set_xticks(x)
axes[0].legend(loc="lower left")
bottom_metrics = [
("eval_coi_leakage_mean_delta_pct", "COI Leakage", "#55A868"),
("eval_volatility_mean_delta_pct", "Volatility", "#DD8452"),
]
for col, label, color in bottom_metrics:
axes[1].plot(
x,
deltas[col].to_numpy(dtype=float),
marker="o",
linewidth=1.8,
markersize=4,
color=color,
label=label,
)
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[1].set_ylabel("Delta (%)")
axes[1].set_xlabel(r"Contamination $\alpha$")
axes[1].set_xticks(x)
axes[1].legend(loc="lower left")
out_path = out_dir / "ppo_delta_curves.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_ppo_tradeoff_scatter(deltas: pd.DataFrame, out_dir: Path) -> Path:
fig, ax = plt.subplots(figsize=(6.4, 5.2), constrained_layout=True)
data = deltas.sort_values("alpha")
x = data["eval_coi_leakage_mean_delta_pct"].to_numpy(dtype=float)
y = data["eval_revenue_mean_delta_pct"].to_numpy(dtype=float)
alphas = data["alpha"].to_numpy(dtype=float)
scatter = ax.scatter(
x,
y,
c=alphas,
cmap="viridis",
s=72,
edgecolor="#222222",
linewidth=0.5,
)
for x_i, y_i, alpha in zip(x, y, alphas):
ax.annotate(
rf"$\alpha={alpha:.2f}$",
(x_i, y_i),
textcoords="offset points",
xytext=(5, 4),
fontsize=8,
)
ax.axhline(0.0, color="#555555", linewidth=1.0, linestyle="--")
ax.axvline(0.0, color="#555555", linewidth=1.0, linestyle="--")
ax.set_xlabel("COI Leakage Delta (%)")
ax.set_ylabel("Revenue Delta (%)")
ax.set_title("PPO Robust Tradeoff Frontier")
cbar = fig.colorbar(scatter, ax=ax)
cbar.set_label(r"Contamination $\alpha$")
out_path = out_dir / "ppo_tradeoff_scatter.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_first_sweep_tier_revenue(tier_mode: pd.DataFrame, out_dir: Path) -> Path:
pivot = (
tier_mode.pivot(index="tier", columns="mode", values="eval_revenue_mean_mean")
.dropna(subset=["robust", "no_robust"], how="any")
.copy()
)
if pivot.empty:
raise ValueError("First sweep tier summary missing robust/non-robust pairs")
order = sorted(pivot.index.tolist())
pivot = pivot.loc[order]
delta_pct = 100.0 * (pivot["robust"] - pivot["no_robust"]) / pivot["no_robust"]
fig, axes = plt.subplots(1, 2, figsize=(10.2, 4.3), constrained_layout=True)
x = np.arange(len(order))
width = 0.36
axes[0].bar(
x - width / 2,
pivot["no_robust"].to_numpy(dtype=float),
width=width,
label="Non-robust",
color="#4C72B0",
)
axes[0].bar(
x + width / 2,
pivot["robust"].to_numpy(dtype=float),
width=width,
label="Robust",
color="#C44E52",
)
axes[0].set_xticks(x)
axes[0].set_xticklabels(order, rotation=20)
axes[0].set_ylabel("Mean Revenue")
axes[0].set_yscale("log")
axes[0].yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
axes[0].set_title("First Sweep Tier Revenue (log scale)")
axes[0].legend()
axes[1].bar(x, delta_pct.to_numpy(dtype=float), color="#55A868", width=0.55)
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[1].set_xticks(x)
axes[1].set_xticklabels(order, rotation=20)
axes[1].set_ylabel("Revenue Delta (%)")
axes[1].set_title("Robust Minus Non-robust by Tier")
out_path = out_dir / "first_sweep_tier_revenue.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def build_plots(data_dir: Path, out_dir: Path) -> list[Path]:
alpha_mode = _load_csv(data_dir / "ppo_alpha_mode_summary.csv")
deltas = _load_csv(data_dir / "ppo_alpha_deltas.csv")
tier_mode = _load_csv(data_dir / "first_sweep_tier_mode_summary.csv")
out_dir.mkdir(parents=True, exist_ok=True)
paths = [
_plot_ppo_alpha_curves(alpha_mode, out_dir),
_plot_ppo_delta_curves(deltas, out_dir),
_plot_ppo_tradeoff_scatter(deltas, out_dir),
_plot_first_sweep_tier_revenue(tier_mode, out_dir),
]
return paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Create paper-ready plots from result CSVs"
)
parser.add_argument("--data-dir", type=Path, default=_output_dir())
parser.add_argument("--plot-dir", type=Path, default=_plot_dir())
parser.add_argument(
"--refresh-data",
action="store_true",
help="Regenerate processed CSVs before plotting",
)
args = parser.parse_args()
_configure_style()
if bool(args.refresh_data):
run_ppo_benchmark(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "ppo_benchmark.csv",
output_dir=args.data_dir,
include_non_finished=False,
)
run_first_sweep(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "first_sweep.csv",
output_dir=args.data_dir,
include_non_finished=False,
top_n=25,
)
outputs = build_plots(data_dir=args.data_dir, out_dir=args.plot_dir)
for path in outputs:
print(path)
if __name__ == "__main__":
main()

View File

@@ -1,51 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from process_first_sweep import run as run_first_sweep
from process_ppo_benchmark import run as run_ppo_benchmark
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def main() -> None:
parser = argparse.ArgumentParser(
description="Process all result CSV exports for paper figures"
)
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
parser.add_argument("--top-n", type=int, default=25)
args = parser.parse_args()
written: list[Path] = []
written.extend(
run_ppo_benchmark(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "ppo_benchmark.csv",
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
)
)
written.extend(
run_first_sweep(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "first_sweep.csv",
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
top_n=int(args.top_n),
)
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import json
from pathlib import Path
import subprocess
from typing import Any
import matplotlib
@@ -37,6 +38,20 @@ def _default_plot_dir(output_dir: Path) -> Path:
return output_dir / "plots"
def _git_commit() -> str:
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
check=True,
text=True,
capture_output=True,
cwd=_project_root(),
)
except Exception:
return "unknown"
return result.stdout.strip()
def _truthy(value: Any) -> bool:
if isinstance(value, bool):
return value
@@ -195,6 +210,48 @@ def _zone_summary(alpha_deltas: pd.DataFrame) -> pd.DataFrame:
)
def _alpha_product_coi_preservation(runs: pd.DataFrame) -> pd.DataFrame:
grouped = (
runs.groupby(["alpha", "n_products", "mode"], as_index=False)
.agg(
runs=("run_id", "size"),
coi_level_mean=("eval_coi_level_mean", "mean"),
)
.sort_values(["alpha", "n_products", "mode"])
.reset_index(drop=True)
)
rows: list[dict[str, float | int]] = []
for (alpha, n_products), group in grouped.groupby(
["alpha", "n_products"], sort=True
):
defended = group[group["mode"] == "defended"]
baseline = group[group["mode"] == "baseline"]
if defended.empty or baseline.empty:
continue
d_coi = float(defended["coi_level_mean"].iloc[0])
b_coi = float(baseline["coi_level_mean"].iloc[0])
rows.append(
{
"alpha": float(alpha),
"n_products": float(n_products),
"baseline_runs": int(baseline["runs"].iloc[0]),
"defended_runs": int(defended["runs"].iloc[0]),
"baseline_coi_level_mean": b_coi,
"defended_coi_level_mean": d_coi,
"coi_preserved": d_coi - b_coi,
"coi_preserved_pct": 0.0
if b_coi == 0.0
else 100.0 * (d_coi - b_coi) / b_coi,
}
)
return (
pd.DataFrame(rows).sort_values(["alpha", "n_products"]).reset_index(drop=True)
)
def _save_plot(fig: plt.Figure, path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(path, bbox_inches="tight")
@@ -202,6 +259,61 @@ def _save_plot(fig: plt.Figure, path: Path) -> Path:
return path
def _smoothed_curve(
x: np.ndarray,
y: np.ndarray,
*,
window: int = 5,
points: int = 320,
) -> tuple[np.ndarray, np.ndarray]:
x_values = np.asarray(x, dtype=float)
y_values = np.asarray(y, dtype=float)
mask = np.isfinite(x_values) & np.isfinite(y_values)
x_values = x_values[mask]
y_values = y_values[mask]
if x_values.size == 0:
return x_values, y_values
order = np.argsort(x_values)
x_values = x_values[order]
y_values = y_values[order]
unique_x = np.unique(x_values)
if unique_x.size != x_values.size:
dedup = (
pd.DataFrame({"x": x_values, "y": y_values})
.groupby("x", as_index=False)
.agg(y=("y", "mean"))
.sort_values("x")
)
x_values = dedup["x"].to_numpy(dtype=float)
y_values = dedup["y"].to_numpy(dtype=float)
if x_values.size < 3:
return x_values, y_values
win = int(max(3, window))
if win % 2 == 0:
win += 1
if win > x_values.size:
win = x_values.size if x_values.size % 2 == 1 else x_values.size - 1
if win < 3:
return x_values, y_values
half = win // 2
offsets = np.arange(-half, half + 1, dtype=float)
sigma = max(win / 3.0, 1.0)
kernel = np.exp(-0.5 * (offsets / sigma) ** 2)
kernel = kernel / np.sum(kernel)
y_padded = np.pad(y_values, (half, half), mode="edge")
y_smooth = np.convolve(y_padded, kernel, mode="valid")
n_points = max(int(points), x_values.size)
x_dense = np.linspace(float(np.min(x_values)), float(np.max(x_values)), n_points)
y_dense = np.interp(x_dense, x_values, y_smooth)
return x_dense, y_dense
def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
@@ -220,7 +332,6 @@ def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Pa
color=color,
label=label,
)
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Mean episode revenue")
ax.set_title("Final Cohort Revenue Curves")
@@ -228,6 +339,147 @@ def _plot_focus_revenue_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Pa
return _save_plot(fig, out_path)
def _plot_focus_coi_by_alpha(alpha_mode: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
for mode, color, label in (
("baseline", "#4C72B0", "Baseline"),
("defended", "#C44E52", "Defended"),
):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
x_raw = sub["alpha"].to_numpy(dtype=float)
y_raw = sub["coi_level_mean"].to_numpy(dtype=float)
x_smooth, y_smooth = _smoothed_curve(x_raw, y_raw)
ax.plot(
x_smooth,
y_smooth,
linewidth=1.9,
color=color,
label=label,
)
ax.scatter(
x_raw,
y_raw,
s=18,
color=color,
edgecolor="#FFFFFF",
linewidth=0.45,
zorder=3,
)
paired = alpha_mode.pivot_table(
index="alpha",
columns="mode",
values="coi_level_mean",
aggfunc="mean",
).sort_index()
if {"baseline", "defended"}.issubset(set(paired.columns)):
paired = paired.dropna(subset=["baseline", "defended"], how="any")
if not paired.empty:
x = paired.index.to_numpy(dtype=float)
y_baseline = paired["baseline"].to_numpy(dtype=float)
y_defended = paired["defended"].to_numpy(dtype=float)
x_fill, y_baseline_smooth = _smoothed_curve(x, y_baseline)
_, y_defended_smooth = _smoothed_curve(x, y_defended)
ax.fill_between(
x_fill,
y_baseline_smooth,
y_defended_smooth,
color="#55A868",
alpha=0.12,
label="Gap",
)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Mean COI level")
ax.set_title("Final Cohort COI Curves")
ax.legend(loc="lower left")
return _save_plot(fig, out_path)
def _plot_focus_coi_preservation_grid(
coi_preservation: pd.DataFrame, out_path: Path
) -> Path:
if coi_preservation.empty:
raise ValueError("COI preservation grid requires at least one paired cell")
alpha_levels = sorted(coi_preservation["alpha"].dropna().unique().tolist())
endpoint_targets = (0.0, 1.0)
endpoint_levels = [
alpha
for target in endpoint_targets
for alpha in alpha_levels
if np.isclose(alpha, target, atol=1e-9)
]
if len(endpoint_levels) < 2 and alpha_levels:
endpoint_levels = [alpha_levels[0], alpha_levels[-1]]
endpoint_levels = sorted(set(endpoint_levels))
data = coi_preservation[coi_preservation["alpha"].isin(endpoint_levels)].copy()
if data.empty:
raise ValueError(
"COI preservation grid has no rows for selected alpha endpoints"
)
alpha_levels = sorted(data["alpha"].dropna().unique().tolist())
product_levels = sorted(data["n_products"].dropna().unique().tolist())
bars = data.pivot_table(
index="n_products",
columns="alpha",
values="coi_preserved",
aggfunc="mean",
).reindex(index=product_levels, columns=alpha_levels)
x = np.arange(len(product_levels), dtype=float)
n_alpha = max(len(alpha_levels), 1)
bar_width = min(0.78 / n_alpha, 0.35)
offsets = (np.arange(n_alpha, dtype=float) - (n_alpha - 1) / 2.0) * bar_width
palette = ["#4C72B0", "#C44E52", "#55A868", "#8172B3"]
fig, ax = plt.subplots(figsize=(7.8, 5.0), constrained_layout=True)
for idx, alpha in enumerate(alpha_levels):
values = bars[alpha].to_numpy(dtype=float)
mask = np.isfinite(values)
if not np.any(mask):
continue
xpos = x[mask] + offsets[idx]
v = values[mask]
ax.bar(
xpos,
v,
width=bar_width * 0.96,
color=palette[idx % len(palette)],
label=rf"$\alpha={alpha:.1f}$",
)
for x_i, y_i in zip(xpos, v):
ax.text(
float(x_i),
float(y_i) + (0.035 if y_i >= 0 else -0.035),
f"{y_i:+.2f}",
ha="center",
va="bottom" if y_i >= 0 else "top",
fontsize=7,
)
valid = bars.to_numpy(dtype=float)
valid = valid[np.isfinite(valid)]
max_abs = float(np.max(np.abs(valid))) if valid.size else 1.0
max_abs = max(max_abs * 1.22, 0.4)
ax.set_ylim(-max_abs, max_abs)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.set_xticks(x)
ax.set_xticklabels([f"{int(v)}" for v in product_levels])
ax.set_xlabel("Product count")
ax.set_ylabel("COI preserved (defended minus baseline)")
ax.set_title("COI Preservation by Product Count at $\\alpha=0.0$ vs $\\alpha=1.0$")
ax.legend(loc="upper right")
ax.grid(axis="y", alpha=0.22)
return _save_plot(fig, out_path)
def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
fig, ax = plt.subplots(figsize=(7.8, 4.8), constrained_layout=True)
x = alpha_deltas["alpha"].to_numpy(dtype=float)
@@ -235,7 +487,6 @@ def _plot_focus_revenue_delta(alpha_deltas: pd.DataFrame, out_path: Path) -> Pat
ax.plot(x, y, marker="o", linewidth=2.0, markersize=4, color="#C44E52")
ax.fill_between(x, y, 0.0, color="#C44E52", alpha=0.12)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
high = alpha_deltas[alpha_deltas["alpha"] >= 0.7]
if not high.empty:
best = high.reindex(
@@ -283,7 +534,6 @@ def _plot_focus_risk_deltas(alpha_deltas: pd.DataFrame, out_path: Path) -> Path:
label="Volatility delta",
)
ax.axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
ax.axvline(0.7, color="#666666", linewidth=1.0, linestyle="--")
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel("Defended minus baseline")
ax.set_title("Leakage and Stability Deltas (Final Cohort)")
@@ -297,13 +547,21 @@ def _write_include(path: Path, figure_rel_path: str, width: str) -> Path:
return path
def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
def run(
bundle_dir: Path,
output_dir: Path,
plot_dir: Path,
focus_sweep_id: str | None = None,
) -> list[Path]:
all_runs = _load_runs(bundle_dir)
focus_id = _focus_sweep(all_runs)
focus_id = str(focus_sweep_id) if focus_sweep_id else _focus_sweep(all_runs)
if focus_id not in set(all_runs["sweep_id"].astype(str).unique()):
raise ValueError(f"Requested focus sweep_id not found: {focus_id}")
focus_runs = all_runs[all_runs["sweep_id"] == focus_id].copy()
alpha_mode = _alpha_mode_summary(focus_runs)
deltas = _alpha_deltas(alpha_mode)
zones = _zone_summary(deltas)
coi_preservation = _alpha_product_coi_preservation(focus_runs)
output_dir.mkdir(parents=True, exist_ok=True)
plot_dir.mkdir(parents=True, exist_ok=True)
@@ -321,9 +579,16 @@ def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
zones.to_csv(zone_path, index=False)
written.append(zone_path)
coi_grid_path = output_dir / "final_focus_coi_preservation_grid.csv"
coi_preservation.to_csv(coi_grid_path, index=False)
written.append(coi_grid_path)
headline = {
"bundle": str(bundle_dir),
"focus_cohort": "max_alpha_coverage",
"focus_sweep_id": focus_id,
"focus_run_count": int(len(focus_runs)),
"git_commit": _git_commit(),
"alpha_cells": int(deltas["alpha"].nunique()) if not deltas.empty else 0,
"alpha_min": float(deltas["alpha"].min()) if not deltas.empty else None,
"alpha_max": float(deltas["alpha"].max()) if not deltas.empty else None,
@@ -345,6 +610,18 @@ def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
plot_dir / "final_focus_revenue_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_by_alpha(
alpha_mode,
plot_dir / "final_focus_coi_by_alpha.pdf",
)
)
written.append(
_plot_focus_coi_preservation_grid(
coi_preservation,
plot_dir / "final_focus_coi_preservation_grid.pdf",
)
)
written.append(
_plot_focus_revenue_delta(
deltas,
@@ -358,7 +635,7 @@ def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
)
)
include_dir = Path(__file__).resolve().parent / "includes" / "final"
include_dir = Path(__file__).resolve().parent / "includes"
written.append(
_write_include(
include_dir / "final_focus_revenue_by_alpha.tex",
@@ -366,6 +643,20 @@ def run(bundle_dir: Path, output_dir: Path, plot_dir: Path) -> list[Path]:
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_by_alpha.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_by_alpha.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_coi_preservation_grid.tex",
"chapters/figures/results/generated/final/plots/final_focus_coi_preservation_grid.pdf",
"0.98\\linewidth",
)
)
written.append(
_write_include(
include_dir / "final_focus_revenue_delta.tex",
@@ -390,6 +681,7 @@ def main() -> None:
parser.add_argument("--bundle-dir", type=Path, default=_default_bundle_dir())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--plot-dir", type=Path, default=None)
parser.add_argument("--focus-sweep-id", type=str, default=None)
args = parser.parse_args()
_configure_style()
@@ -399,7 +691,10 @@ def main() -> None:
else _default_plot_dir(args.output_dir)
)
outputs = run(
bundle_dir=args.bundle_dir, output_dir=args.output_dir, plot_dir=plot_dir
bundle_dir=args.bundle_dir,
output_dir=args.output_dir,
plot_dir=plot_dir,
focus_sweep_id=args.focus_sweep_id,
)
for path in outputs:
print(path)

View File

@@ -1,272 +0,0 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Iterable
import numpy as np
import pandas as pd
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_input() -> Path:
return _project_root() / "tpu_orchestration" / "results" / "first_sweep.csv"
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _sanitize(key: str) -> str:
return key.replace("/", "_").replace("-", "_")
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
if "study/alpha" in frame.columns:
return pd.to_numeric(frame["study/alpha"], errors="coerce")
if "alpha" in frame.columns:
return pd.to_numeric(frame["alpha"], errors="coerce")
return pd.Series(np.nan, index=frame.index, dtype=float)
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
if "study/mode" in frame.columns:
return frame["study/mode"].astype(str).str.strip().str.lower()
if "study/no_robust" in frame.columns:
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
return pd.Series(
np.where(no_robust > 0.5, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
if "no_robust" in frame.columns:
no_robust = (
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
)
return pd.Series(
np.where(no_robust, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
return pd.Series("", index=frame.index, dtype="object")
def _extract_tier(frame: pd.DataFrame) -> pd.Series:
for column in ("tiers", "runtime/backend", "algo", "run.backend", "run.algo"):
if column in frame.columns:
tier = frame[column].astype(str).str.strip().str.lower()
if tier.notna().any():
return tier
return pd.Series("unknown", index=frame.index, dtype="object")
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
data = frame.copy()
if not include_non_finished and "State" in data.columns:
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
data["alpha"] = _extract_alpha(data)
data["mode"] = _extract_mode(data)
data["tier"] = _extract_tier(data)
data = data[data["mode"].isin({"robust", "no_robust"})]
data = data[data["alpha"].notna()]
_coerce_numeric(
data,
[
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/margin_mean",
"eval/volatility_mean",
"objective/score",
"train/alpha_adv",
"lambda_coi",
"robust_radius",
"learning_rate",
"batch_size",
"n_steps",
"total_timesteps",
],
)
return data.sort_values(["tier", "alpha", "mode"]).reset_index(drop=True)
def _group_summary(
frame: pd.DataFrame, by: list[str], metrics: list[str]
) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return frame.groupby(by, as_index=False).agg(**agg_spec).sort_values(by)
def _tier_alpha_deltas(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
rows: list[dict[str, float | str]] = []
for (tier, alpha), group in summary.groupby(["tier", "alpha"], sort=True):
robust = group[group["mode"] == "robust"]
no_robust = group[group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
row: dict[str, float | str] = {
"tier": str(tier),
"alpha": float(alpha),
"runs_robust": float(robust["runs"].iloc[0]),
"runs_no_robust": float(no_robust["runs"].iloc[0]),
}
for metric in metrics:
safe = _sanitize(metric)
robust_value = float(robust[f"{safe}_mean"].iloc[0])
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
delta = robust_value - no_robust_value
row[f"{safe}_delta"] = delta
row[f"{safe}_delta_pct"] = (
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
)
rows.append(row)
return pd.DataFrame(rows)
def _top_runs(frame: pd.DataFrame, n: int) -> pd.DataFrame:
rank_metric = "objective/score"
if rank_metric not in frame.columns or frame[rank_metric].notna().sum() == 0:
rank_metric = "eval/reward_mean"
keep = [
"Name",
"tier",
"alpha",
"mode",
rank_metric,
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"lambda_coi",
"robust_radius",
"learning_rate",
"batch_size",
"n_steps",
"total_timesteps",
]
present = [column for column in keep if column in frame.columns]
ranked = frame[present].copy().sort_values(rank_metric, ascending=False)
return ranked.head(max(1, int(n))).reset_index(drop=True)
def _headline_json(
frame: pd.DataFrame, tier_mode: pd.DataFrame
) -> dict[str, float | str]:
out: dict[str, float | str] = {
"runs": int(len(frame)),
"tiers": int(frame["tier"].nunique()),
"alphas": int(frame["alpha"].nunique()),
}
robust_rows = tier_mode[tier_mode["mode"] == "robust"]
no_robust_rows = tier_mode[tier_mode["mode"] == "no_robust"]
if robust_rows.empty or no_robust_rows.empty:
out["status"] = "incomplete_modes"
return out
robust_mean = robust_rows["eval_revenue_mean_mean"].mean()
no_robust_mean = no_robust_rows["eval_revenue_mean_mean"].mean()
out.update(
{
"status": "ok",
"mean_tier_revenue_robust": float(robust_mean),
"mean_tier_revenue_no_robust": float(no_robust_mean),
"mean_tier_revenue_delta": float(robust_mean - no_robust_mean),
"mean_tier_revenue_delta_pct": float(
100.0 * (robust_mean - no_robust_mean) / no_robust_mean
)
if no_robust_mean
else np.nan,
}
)
return out
def run(
input_path: Path, output_dir: Path, include_non_finished: bool, top_n: int
) -> list[Path]:
output_dir.mkdir(parents=True, exist_ok=True)
raw = pd.read_csv(input_path)
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
metrics = [
metric
for metric in (
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/margin_mean",
"eval/volatility_mean",
"objective/score",
"train/alpha_adv",
)
if metric in frame.columns
]
tier_mode = _group_summary(frame, ["tier", "mode"], metrics)
tier_alpha_mode = _group_summary(frame, ["tier", "alpha", "mode"], metrics)
deltas = _tier_alpha_deltas(tier_alpha_mode, metrics)
top_configs = _top_runs(frame, n=top_n)
headline = _headline_json(frame, tier_mode)
outputs = {
"first_sweep_tier_mode_summary.csv": tier_mode,
"first_sweep_tier_alpha_mode_summary.csv": tier_alpha_mode,
"first_sweep_tier_alpha_deltas.csv": deltas,
"first_sweep_top_configs.csv": top_configs,
}
written_paths: list[Path] = []
for filename, table in outputs.items():
path = output_dir / filename
table.to_csv(path, index=False)
written_paths.append(path)
headline_path = output_dir / "first_sweep_headline_summary.json"
headline_path.write_text(json.dumps(headline, indent=2))
written_paths.append(headline_path)
return written_paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Process first sweep CSV for paper tables"
)
parser.add_argument("--input", type=Path, default=_default_input())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
parser.add_argument("--top-n", type=int, default=25)
args = parser.parse_args()
written = run(
input_path=args.input,
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
top_n=int(args.top_n),
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -1,277 +0,0 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Iterable
import numpy as np
import pandas as pd
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_input() -> Path:
return _project_root() / "tpu_orchestration" / "results" / "ppo_benchmark.csv"
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _sanitize(key: str) -> str:
return key.replace("/", "_").replace("-", "_")
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
if "study/alpha" in frame.columns:
return pd.to_numeric(frame["study/alpha"], errors="coerce")
if "alpha" in frame.columns:
return pd.to_numeric(frame["alpha"], errors="coerce")
return pd.Series(np.nan, index=frame.index, dtype=float)
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
if "study/mode" in frame.columns:
return frame["study/mode"].astype(str).str.strip().str.lower()
if "study/no_robust" in frame.columns:
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
return pd.Series(
np.where(no_robust > 0.5, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
if "no_robust" in frame.columns:
no_robust = (
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
)
return pd.Series(
np.where(no_robust, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
return pd.Series("", index=frame.index, dtype="object")
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
data = frame.copy()
if not include_non_finished and "State" in data.columns:
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
data["alpha"] = _extract_alpha(data)
data["mode"] = _extract_mode(data)
data = data[data["mode"].isin({"robust", "no_robust"})]
data = data[data["alpha"].notna()]
numeric_cols = [
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/volatility_mean",
"eval/margin_mean",
"train/alpha_adv",
"train/coi_penalty",
"train/ux_penalty",
"train/agent_prob",
]
_coerce_numeric(data, numeric_cols)
return data.sort_values(["alpha", "mode"]).reset_index(drop=True)
def _summary_by_alpha_mode(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return (
frame.groupby(["alpha", "mode"], as_index=False)
.agg(**agg_spec)
.sort_values(["alpha", "mode"])
.reset_index(drop=True)
)
def _delta_by_alpha(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
rows: list[dict[str, float]] = []
for alpha, alpha_group in summary.groupby("alpha", sort=True):
robust = alpha_group[alpha_group["mode"] == "robust"]
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
row: dict[str, float] = {
"alpha": float(alpha),
"runs_robust": float(robust["runs"].iloc[0]),
"runs_no_robust": float(no_robust["runs"].iloc[0]),
}
for metric in metrics:
safe = _sanitize(metric)
robust_value = float(robust[f"{safe}_mean"].iloc[0])
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
delta = robust_value - no_robust_value
row[f"{safe}_robust"] = robust_value
row[f"{safe}_no_robust"] = no_robust_value
row[f"{safe}_delta"] = delta
row[f"{safe}_delta_pct"] = (
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
)
rows.append(row)
return pd.DataFrame(rows)
def _pairwise_win_rates(frame: pd.DataFrame) -> pd.DataFrame:
rules = {
"eval/revenue_mean": "higher",
"eval/reward_mean": "higher",
"eval/coi_leakage_mean": "lower",
"eval/volatility_mean": "lower",
}
rows: list[dict[str, float]] = []
for alpha, alpha_group in frame.groupby("alpha", sort=True):
robust = alpha_group[alpha_group["mode"] == "robust"]
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
for metric, direction in rules.items():
if metric not in frame.columns:
continue
robust_values = robust[metric].dropna().to_numpy(dtype=float)
no_robust_values = no_robust[metric].dropna().to_numpy(dtype=float)
if robust_values.size == 0 or no_robust_values.size == 0:
continue
if direction == "higher":
wins = (robust_values[:, None] > no_robust_values[None, :]).sum()
else:
wins = (robust_values[:, None] < no_robust_values[None, :]).sum()
ties = (robust_values[:, None] == no_robust_values[None, :]).sum()
total = robust_values.size * no_robust_values.size
win_prob = (wins + 0.5 * ties) / total
rows.append(
{
"alpha": float(alpha),
"metric": metric,
"direction": direction,
"wins": int(wins),
"ties": int(ties),
"total_pairs": int(total),
"win_probability": float(win_prob),
}
)
return pd.DataFrame(rows)
def _overall_mode_summary(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return frame.groupby("mode", as_index=False).agg(**agg_spec).sort_values("mode")
def _headline_json(overall: pd.DataFrame) -> dict[str, float | str]:
if {"robust", "no_robust"} - set(overall["mode"].tolist()):
return {"status": "incomplete_modes"}
robust = overall[overall["mode"] == "robust"].iloc[0]
no_robust = overall[overall["mode"] == "no_robust"].iloc[0]
revenue_delta = float(
robust["eval_revenue_mean_mean"] - no_robust["eval_revenue_mean_mean"]
)
leakage_delta = float(
robust["eval_coi_leakage_mean_mean"] - no_robust["eval_coi_leakage_mean_mean"]
)
return {
"status": "ok",
"revenue_delta": revenue_delta,
"revenue_delta_pct": float(
100.0 * revenue_delta / no_robust["eval_revenue_mean_mean"]
),
"coi_leakage_delta": leakage_delta,
"coi_leakage_delta_pct": float(
100.0 * leakage_delta / no_robust["eval_coi_leakage_mean_mean"]
),
}
def run(input_path: Path, output_dir: Path, include_non_finished: bool) -> list[Path]:
output_dir.mkdir(parents=True, exist_ok=True)
raw = pd.read_csv(input_path)
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
metrics = [
metric
for metric in (
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/volatility_mean",
"eval/margin_mean",
"train/alpha_adv",
"train/coi_penalty",
"train/ux_penalty",
"train/agent_prob",
)
if metric in frame.columns
]
alpha_mode = _summary_by_alpha_mode(frame, metrics)
deltas = _delta_by_alpha(alpha_mode, metrics)
win_rates = _pairwise_win_rates(frame)
overall = _overall_mode_summary(frame, metrics)
headline = _headline_json(overall)
outputs = {
"ppo_alpha_mode_summary.csv": alpha_mode,
"ppo_alpha_deltas.csv": deltas,
"ppo_pairwise_win_rates.csv": win_rates,
"ppo_overall_mode_summary.csv": overall,
}
written_paths: list[Path] = []
for filename, table in outputs.items():
path = output_dir / filename
table.to_csv(path, index=False)
written_paths.append(path)
headline_path = output_dir / "ppo_headline_summary.json"
headline_path.write_text(json.dumps(headline, indent=2))
written_paths.append(headline_path)
return written_paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Process PPO benchmark CSV for paper tables"
)
parser.add_argument("--input", type=Path, default=_default_input())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
args = parser.parse_args()
written = run(
input_path=args.input,
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,454 @@
from __future__ import annotations
import argparse
import json
import subprocess
from pathlib import Path
from typing import Iterable
import numpy as np
import pandas as pd
from scipy import stats
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_bundle_dir() -> Path:
base = _project_root() / "engine" / "studies" / "results" / "wandb_sweep_bundles"
bundles = sorted(
[path for path in base.glob("bundle_*") if path.is_dir()],
key=lambda path: path.stat().st_mtime,
reverse=True,
)
if not bundles:
raise FileNotFoundError(f"No sweep bundle directories found in {base}")
return bundles[0]
def _bundle_dir_from_id(bundle_id: str) -> Path:
token = str(bundle_id).strip()
name = token if token.startswith("bundle_") else f"bundle_{token}"
path = (
_project_root()
/ "engine"
/ "studies"
/ "results"
/ "wandb_sweep_bundles"
/ name
)
if not path.exists():
raise FileNotFoundError(f"Bundle not found: {path}")
return path
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "final"
def _truthy(value: object) -> bool:
if isinstance(value, bool):
return value
if value is None:
return False
return str(value).strip().lower() in {"1", "true", "yes", "on"}
def _mode_of(row: pd.Series) -> str:
mode_hint = str(row.get("study_mode", "")).strip().lower()
if mode_hint in {"baseline", "no_robust"}:
return "baseline"
if mode_hint in {"defended", "robust"}:
return "defended"
if _truthy(row.get("baseline_mode")) or _truthy(row.get("no_robust")):
return "baseline"
return "defended"
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _load_runs(bundle_dir: Path) -> pd.DataFrame:
path = bundle_dir / "runs_finished.csv"
if not path.exists():
raise FileNotFoundError(f"Missing required file: {path}")
frame = pd.read_csv(path)
frame["mode"] = frame.apply(_mode_of, axis=1)
_coerce_numeric(
frame,
[
"alpha",
"n_products",
"eta_ux",
"lambda_coi",
"eval_revenue_mean",
],
)
frame = frame[frame["mode"].isin({"baseline", "defended"})].copy()
return frame
def _get_git_commit() -> str:
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
check=True,
text=True,
capture_output=True,
cwd=_project_root(),
)
except Exception:
return "unknown"
return result.stdout.strip()
def _apply_filters(frame: pd.DataFrame, args: argparse.Namespace) -> pd.DataFrame:
data = frame.copy()
if args.sweep_id:
allowed = {str(value) for value in args.sweep_id}
data = data[data["sweep_id"].astype(str).isin(allowed)]
if args.mode != "all":
data = data[data["mode"] == args.mode]
if args.n_products is not None:
data = data[data["n_products"] == float(args.n_products)]
if args.eta_ux is not None:
data = data[data["eta_ux"] == float(args.eta_ux)]
if args.lambda_coi is not None:
data = data[data["lambda_coi"] == float(args.lambda_coi)]
data = data[data["alpha"].notna() & data["eval_revenue_mean"].notna()]
data = data[data["alpha"] >= float(args.alpha_min)]
data = data[data["alpha"] <= float(args.alpha_max)]
return data.reset_index(drop=True)
def _design_matrix(
frame: pd.DataFrame,
*,
include_sweep_fixed_effects: bool,
) -> tuple[np.ndarray, np.ndarray, list[str]]:
y = frame["eval_revenue_mean"].to_numpy(dtype=float)
x_alpha = frame["alpha"].to_numpy(dtype=float)
columns = ["intercept", "alpha"]
blocks = [np.ones_like(x_alpha), x_alpha]
if include_sweep_fixed_effects:
dummies = pd.get_dummies(
frame["sweep_id"].astype(str), prefix="sweep", drop_first=True
)
if not dummies.empty:
blocks.append(dummies.to_numpy(dtype=float).T)
columns.extend(dummies.columns.tolist())
X = np.vstack(blocks).T
return X, y, columns
def _covariance_hc1(X: np.ndarray, residuals: np.ndarray) -> np.ndarray:
n, k = X.shape
xtx_inv = np.linalg.pinv(X.T @ X)
xr = X * residuals[:, None]
meat = xr.T @ xr
scale = float(n) / max(n - k, 1)
return scale * (xtx_inv @ meat @ xtx_inv)
def _covariance_cluster(
X: np.ndarray, residuals: np.ndarray, groups: pd.Series
) -> tuple[np.ndarray, int]:
xtx_inv = np.linalg.pinv(X.T @ X)
unique = pd.Series(groups).astype(str).dropna().unique().tolist()
g = len(unique)
n, k = X.shape
if g <= 1:
return _covariance_hc1(X, residuals), g
meat = np.zeros((k, k), dtype=float)
for value in unique:
mask = pd.Series(groups).astype(str).to_numpy() == value
Xg = X[mask]
ug = residuals[mask]
xu = Xg.T @ ug
meat += np.outer(xu, xu)
c = (g / (g - 1.0)) * ((n - 1.0) / max(n - k, 1.0))
return c * (xtx_inv @ meat @ xtx_inv), g
def _fit_ols(
X: np.ndarray,
y: np.ndarray,
columns: list[str],
*,
cov_type: str,
groups: pd.Series | None = None,
) -> dict[str, object]:
n, k = X.shape
beta, _, _, _ = np.linalg.lstsq(X, y, rcond=None)
fitted = X @ beta
residuals = y - fitted
dof = max(n - k, 1)
sse = float(np.sum(residuals**2))
y_centered = y - float(np.mean(y))
sst = float(np.sum(y_centered**2))
r2 = float(1.0 - sse / sst) if sst > 0 else 0.0
adj_r2 = float(1.0 - (1.0 - r2) * ((n - 1.0) / max(n - k, 1.0)))
if cov_type == "iid":
sigma2 = sse / dof
cov = sigma2 * np.linalg.pinv(X.T @ X)
df_t = dof
clusters = None
elif cov_type == "hc1":
cov = _covariance_hc1(X, residuals)
df_t = dof
clusters = None
elif cov_type == "cluster":
if groups is None:
raise ValueError("groups are required when cov_type='cluster'")
cov, clusters = _covariance_cluster(X, residuals, groups)
df_t = max(clusters - 1, 1)
else:
raise ValueError(f"Unsupported cov_type: {cov_type}")
se = np.sqrt(np.clip(np.diag(cov), 0.0, np.inf))
t_stats = np.divide(beta, se, out=np.zeros_like(beta), where=se > 0)
p_values = 2.0 * (1.0 - stats.t.cdf(np.abs(t_stats), df=df_t))
t_crit = float(stats.t.ppf(0.975, df=df_t))
ci_low = beta - t_crit * se
ci_high = beta + t_crit * se
coef_rows = []
for idx, name in enumerate(columns):
coef_rows.append(
{
"name": name,
"coef": float(beta[idx]),
"std_error": float(se[idx]),
"t_stat": float(t_stats[idx]),
"p_value": float(p_values[idx]),
"ci95_low": float(ci_low[idx]),
"ci95_high": float(ci_high[idx]),
}
)
return {
"n": int(n),
"k": int(k),
"dof": int(dof),
"df_t": int(df_t),
"cov_type": cov_type,
"clusters": int(clusters) if clusters is not None else None,
"r2": r2,
"adj_r2": adj_r2,
"sse": sse,
"coefficients": coef_rows,
"residuals": residuals,
"fitted": fitted,
"beta": beta,
}
def _diagnostics(
X: np.ndarray, y: np.ndarray, fit: dict[str, object]
) -> dict[str, object]:
residuals = np.asarray(fit["residuals"], dtype=float)
n, k = X.shape
if residuals.size < 8:
normality = {"test": "jarque_bera", "available": False}
else:
jb_stat, jb_p = stats.jarque_bera(residuals)
normality = {
"test": "jarque_bera",
"available": True,
"statistic": float(jb_stat),
"p_value": float(jb_p),
}
if k <= 1:
hetero = {"test": "breusch_pagan", "available": False}
else:
u2 = residuals**2
aux = _fit_ols(X, u2, [f"x{i}" for i in range(k)], cov_type="iid")
lm = float(len(u2) * float(aux["r2"]))
df_bp = k - 1
p_bp = float(1.0 - stats.chi2.cdf(lm, df_bp))
hetero = {
"test": "breusch_pagan",
"available": True,
"lm_stat": lm,
"df": int(df_bp),
"p_value": p_bp,
}
xtx_inv = np.linalg.pinv(X.T @ X)
leverages = np.sum((X @ xtx_inv) * X, axis=1)
mse = float(np.sum(residuals**2) / max(n - k, 1))
if mse <= 0:
cooks = np.zeros(n, dtype=float)
else:
denom = np.clip((1.0 - leverages) ** 2, 1e-10, np.inf)
cooks = ((residuals**2) / (k * mse)) * (leverages / denom)
return {
"normality": normality,
"heteroskedasticity": hetero,
"influence": {
"max_leverage": float(np.max(leverages)) if leverages.size else 0.0,
"mean_leverage": float(np.mean(leverages)) if leverages.size else 0.0,
"high_leverage_threshold": float(2.0 * k / max(n, 1)),
"high_leverage_count": int(np.sum(leverages > (2.0 * k / max(n, 1)))),
"max_cooks_distance": float(np.max(cooks)) if cooks.size else 0.0,
"high_cooks_threshold": float(4.0 / max(n, 1)),
"high_cooks_count": int(np.sum(cooks > (4.0 / max(n, 1)))),
},
}
def run(args: argparse.Namespace) -> list[Path]:
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
runs = _load_runs(Path(args.bundle_dir))
filtered = _apply_filters(runs, args)
if len(filtered) < 3:
raise ValueError("Filtered cohort must contain at least 3 rows")
if filtered["alpha"].nunique() < 2:
raise ValueError("Filtered cohort must contain at least 2 unique alpha values")
filtered_csv = output_dir / "revenue_alpha_filtered.csv"
filtered.to_csv(filtered_csv, index=False)
sample_accounting = {
"bundle_dir": str(Path(args.bundle_dir)),
"git_commit": _get_git_commit(),
"cohort_name": str(args.cohort_name),
"filters": {
"sweep_id": args.sweep_id,
"mode": args.mode,
"n_products": args.n_products,
"eta_ux": args.eta_ux,
"lambda_coi": args.lambda_coi,
"alpha_min": args.alpha_min,
"alpha_max": args.alpha_max,
},
"n_rows": int(len(filtered)),
"n_sweeps": int(filtered["sweep_id"].nunique()),
"alpha_unique": sorted(
float(v) for v in filtered["alpha"].dropna().unique().tolist()
),
"rows_by_sweep": filtered.groupby("sweep_id").size().astype(int).to_dict(),
"rows_by_mode": filtered.groupby("mode").size().astype(int).to_dict(),
}
sample_path = output_dir / "revenue_alpha_sample_accounting.json"
sample_path.write_text(json.dumps(sample_accounting, indent=2) + "\n")
X_simple, y, cols_simple = _design_matrix(
filtered, include_sweep_fixed_effects=False
)
fit_simple = _fit_ols(X_simple, y, cols_simple, cov_type="iid")
simple_path = output_dir / "revenue_alpha_simple_ols.json"
simple_path.write_text(
json.dumps(
{
k: v
for k, v in fit_simple.items()
if k not in {"residuals", "fitted", "beta"}
},
indent=2,
)
+ "\n"
)
X_fe, y_fe, cols_fe = _design_matrix(filtered, include_sweep_fixed_effects=True)
cov_type = "cluster" if filtered["sweep_id"].nunique() > 1 else "hc1"
fit_fe = _fit_ols(
X_fe, y_fe, cols_fe, cov_type=cov_type, groups=filtered["sweep_id"]
)
fe_path = output_dir / "revenue_alpha_fixed_effects.json"
fe_path.write_text(
json.dumps(
{
k: v
for k, v in fit_fe.items()
if k not in {"residuals", "fitted", "beta"}
},
indent=2,
)
+ "\n"
)
per_sweep_rows: list[dict[str, float | str | int]] = []
for sweep_id, group in filtered.groupby("sweep_id"):
if len(group) < 3 or group["alpha"].nunique() < 2:
continue
X_sw, y_sw, cols_sw = _design_matrix(group, include_sweep_fixed_effects=False)
fit_sw = _fit_ols(X_sw, y_sw, cols_sw, cov_type="hc1")
alpha_row = next(
row for row in fit_sw["coefficients"] if row["name"] == "alpha"
)
per_sweep_rows.append(
{
"sweep_id": str(sweep_id),
"n": int(fit_sw["n"]),
"alpha_coef": float(alpha_row["coef"]),
"alpha_std_error": float(alpha_row["std_error"]),
"alpha_t_stat": float(alpha_row["t_stat"]),
"alpha_p_value": float(alpha_row["p_value"]),
"alpha_ci95_low": float(alpha_row["ci95_low"]),
"alpha_ci95_high": float(alpha_row["ci95_high"]),
"r2": float(fit_sw["r2"]),
}
)
per_sweep_frame = pd.DataFrame(per_sweep_rows)
if not per_sweep_frame.empty:
per_sweep_frame = per_sweep_frame.sort_values("sweep_id").reset_index(drop=True)
per_sweep_path = output_dir / "revenue_alpha_per_sweep.csv"
per_sweep_frame.to_csv(per_sweep_path, index=False)
fit_for_diagnostics = fit_fe if cov_type == "cluster" else fit_simple
X_for_diagnostics = X_fe if cov_type == "cluster" else X_simple
diagnostics = _diagnostics(X_for_diagnostics, y, fit_for_diagnostics)
diagnostics_path = output_dir / "revenue_alpha_diagnostics.json"
diagnostics_path.write_text(json.dumps(diagnostics, indent=2) + "\n")
return [
filtered_csv,
sample_path,
simple_path,
fe_path,
per_sweep_path,
diagnostics_path,
]
def main() -> None:
parser = argparse.ArgumentParser(
description="Reproducible contamination-vs-revenue analysis from a sweep bundle"
)
parser.add_argument("--bundle-dir", type=Path, default=None)
parser.add_argument("--bundle-id", type=str, default=None)
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--cohort-name", type=str, default="custom")
parser.add_argument("--sweep-id", action="append", default=[])
parser.add_argument(
"--mode", choices=["all", "baseline", "defended"], default="all"
)
parser.add_argument("--n-products", type=float, default=None)
parser.add_argument("--eta-ux", type=float, default=None)
parser.add_argument("--lambda-coi", type=float, default=None)
parser.add_argument("--alpha-min", type=float, default=0.0)
parser.add_argument("--alpha-max", type=float, default=1.0)
args = parser.parse_args()
if args.bundle_id:
args.bundle_dir = _bundle_dir_from_id(args.bundle_id)
elif args.bundle_dir is None:
args.bundle_dir = _default_bundle_dir()
outputs = run(args)
for path in outputs:
print(path)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,38 @@
\begin{tikzpicture}
\begin{axis}[
width=8.8cm,
height=5.2cm,
xmin=-4.6,
xmax=4.6,
ymin=-0.02,
ymax=1.06,
axis lines=left,
xlabel={$\Delta_H - \Delta_A$},
xlabel style={yshift=-1.5pt},
ylabel={$f(\tau')$},
xtick={-4,-2,0,2,4},
ytick={0,0.5,1},
tick label style={font=\small},
label style={font=\small},
line width=0.6pt,
clip=false,
enlarge x limits=false,
]
\addplot[
thick,
domain=-4.6:4.6,
samples=201,
smooth,
] {1/(1+exp(-x))};
\draw[dashed, line width=0.45pt, black!38]
(axis cs:-2.15,0) -- (axis cs:-2.15,{1/(1+exp(2.15))});
\draw[dashed, line width=0.45pt, black!38]
(axis cs:2.15,0) -- (axis cs:2.15,{1/(1+exp(-2.15))});
\addplot[only marks, mark=*, mark size=2.2pt, forget plot, draw=black!55, fill=black!55]
coordinates {(-2.15, {1/(1+exp(2.15))})};
\addplot[only marks, mark=*, mark size=2.2pt, forget plot, draw=black, fill=black]
coordinates {(2.15, {1/(1+exp(-2.15))})};
\node[font=\footnotesize, anchor=south, inner sep=11pt] at (axis cs:-2.15,{1/(1+exp(2.15))}) {$\Delta_H<\Delta_A$};
\node[font=\footnotesize, anchor=south, inner sep=6pt] at (axis cs:2.15,{1/(1+exp(-2.15))}) {$\Delta_H>\Delta_A$};
\end{axis}
\end{tikzpicture}

View File

@@ -20,11 +20,10 @@
bA/.style={rectangle, rounded corners=3pt, draw=heroAmberBorder, fill=heroAmber,
line width=0.9pt, align=center, minimum height=0.85cm},
bY/.style={rectangle, rounded corners=3pt, draw=heroGrayBorder, fill=heroGray,
line width=0.9pt, align=center, minimum height=0.82cm},
line width=0.9pt, align=center, minimum height=0.85cm},
pill/.style={ellipse, draw=black!50, fill=black!4, line width=0.75pt,
align=center, minimum width=1.6cm, minimum height=0.68cm},
arr/.style={->, draw=black!80, line width=0.88pt},
bidir/.style={<->, draw=black!80, line width=0.88pt},
darr/.style={->, draw=black!60, line width=0.80pt, densely dashed},
crossA/.style={->, draw=heroAmberBorder!90!black, line width=1.15pt, dash pattern=on 3.5pt off 2pt},
crossG/.style={->, draw=heroGreenBorder!90!black, line width=1.15pt, dash pattern=on 3.5pt off 2pt},
@@ -55,7 +54,8 @@
\draw[arr] (human.east) -- (web.west);
\draw[arr] (agent.east) -- (web.west);
\draw[arr] (web.east) -- (provider.west);
\draw[bidir] (provider.east) -- (redis.west);
% single arrow: bidir on a short edge stacks two tips and reads as a messy cross
\draw[arr] (provider.east) -- (redis.west);
% web/provider -> kafka
\draw[arr] (web.south) -- (kBehav.north)
@@ -63,9 +63,9 @@
\draw[arr] (provider.south) -- (kQuotes.north)
node[midway, right, lbl] {$(i,p,\mathrm{sid},\phi,t)$};
% kafka -> worker (straight south)
\draw[arr] (kBehav.south) -- (worker.north);
\draw[arr] (kQuotes.south) -- (worker.north);
% kafka -> worker: behavior stream vertical; price quotes L-shaped so both meet worker without a diagonal across the panel
\draw[arr] (kBehav.south) -- (worker.north);
\draw[arr, rounded corners=3pt] (kQuotes.south) -- (7.5, 5.35) -| (worker.north);
% worker -> registry
\draw[arr] (worker.east) -- (registry.west);
@@ -79,36 +79,37 @@
-- (provider.north);
%% ============================================================
%% Panel B x: 11.620.4 y: 2.210.0
%% Panel B x: 11.620.0 y: 2.210.0
%% ============================================================
\draw[panel] (11.6,2.2) rectangle (19.8,10.0);
\draw[panel] (11.6,2.2) rectangle (20.0,10.0);
\node[anchor=west, font=\small\bfseries] at (11.85,9.72) {(b) Distinguishability layer};
\node[bG, minimum width=2.4cm] (session) at (14.0, 8.9) {Session prefix\\$\tau'$};
\node[bB, minimum width=2.4cm] (empKern) at (13.65,7.45) {Empirical kernel\\$\hat T'$};
\node[bY, minimum width=2.4cm] (weakLab) at (17.55,8.9) {Weak labels\\$\mathcal{D}_H,\mathcal{D}_A$};
\node[bY, minimum width=2.2cm] (protoH) at (12.8, 5.9) {Prototype\\$\bar T_H$};
\node[bA, minimum width=2.4cm] (kldist) at (15.55,5.9) {KL distances\\$\Delta_H,\Delta_A$};
\node[bY, minimum width=2.2cm] (protoA) at (18.3, 5.9) {Prototype\\$\bar T_A$};
\node[bB, minimum width=2.9cm] (calHead) at (13.55,4.25) {Contrastive\\calibration head};
\node[bG, minimum width=2.55cm] (score) at (17.75,4.25) {Session score\\$f(\tau'),\hat\alpha(\tau')$};
% x positions shifted +0.3 from the original layout (between left-heavy and +0.55 which hugged the right edge)
\node[bG, minimum width=2.4cm] (session) at (14.3, 8.9) {Session prefix\\$\tau'$};
\node[bB, minimum width=2.4cm] (empKern) at (13.95,7.45) {Empirical kernel\\$\hat T'$};
\node[bY, minimum width=2.4cm] (weakLab) at (17.85,8.9) {Weak labels\\$\mathcal{D}_H,\mathcal{D}_A$};
\node[bY, minimum width=2.2cm] (protoH) at (13.1, 5.9) {Prototype\\$\bar T_H$};
\node[bA, minimum width=2.4cm] (kldist) at (15.85,5.9) {KL distances\\$\Delta_H,\Delta_A$};
\node[bY, minimum width=2.2cm] (protoA) at (18.6, 5.9) {Prototype\\$\bar T_A$};
\node[bB, minimum width=2.9cm] (calHead) at (13.85,4.25) {Contrastive\\calibration head};
\node[bG, minimum width=2.55cm] (score) at (18.05,4.25) {Session score\\$f(\tau'),\hat\alpha(\tau')$};
\node[lbl] at (15.55, 3.15) {$\hat\alpha(\tau')=\sigma\!\left(\beta(\Delta_H-\Delta_A)\right)$};
\node[lbl] at (15.85, 3.15) {$\hat\alpha(\tau')=\sigma\!\left(\beta(\Delta_H-\Delta_A)\right)$};
\draw[arr, rounded corners=4pt] (session.south) -- (empKern.north);
\draw[arr, rounded corners=4pt] (empKern.south) -- (13.65, 6.8) -| (protoH.north);
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.55, 6.8) -| (protoA.north);
\draw[arr, rounded corners=4pt] (empKern.south) -- (13.95, 6.8) -| (protoH.north);
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.85, 6.8) -| (protoA.north);
% weak labels -> protoH: go south then hard-left below weakLab
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.55,6.8) -| (protoH.north east);
\draw[arr, rounded corners=4pt] (weakLab.south) -- (17.85,6.8) -| (protoH.north);
\draw[arr] (protoH.east) -- (kldist.west);
\draw[arr] (protoA.west) -- (kldist.east);
\draw[arr] (kldist.south) -- (calHead.north east);
\draw[arr, rounded corners=4pt] (kldist.south) -- (calHead.north);
\draw[arr] (calHead.east) -- (score.west);
%% ============================================================
%% Panel C x: 20.831.0 y: 2.210.0
%% Panel C x: 20.431.0 y: 2.210.0
%% ============================================================
\draw[panel] (20.8,2.2) rectangle (31.0,10.0);
\draw[panel] (20.4,2.2) rectangle (31.0,10.0);
\node[anchor=west, font=\small\bfseries] at (21.05,9.72) {(c) Distributionally robust control};
\node[bB, minimum width=3.1cm] (state) at (23.15, 8.9)
@@ -129,13 +130,13 @@
\draw[arr, rounded corners=4pt] (ambSet.south) -- (23.15, 6.6) -| ([xshift=-2cm]contScen.north);
\draw[arr, rounded corners=4pt] (innerMin.south) -- (28.55, 6.6) -| ([xshift=2cm]contScen.north);
\draw[arr] (contScen.south) -- (reward.north);
\draw[arr, rounded corners=6pt] (reward.south) -- (25.9, 3.7) -| (policy.north);
% join reward to policy along policy.north y so the last segment never approaches north from below (avoids upward arrowhead on top edge)
\draw[arr, rounded corners=4pt] (reward.south) -- (reward.south |- policy.north) -- (policy.north);
\draw[arr] (policy.east) -- (publish.west);
% market response: up the right edge of panel C, entirely inside, rounded
% market response: up the right edge, then left into state summary from the east
\draw[arrG, rounded corners=6pt] (publish.east) -- (30.6, 3.05)
-- (30.6, 9.8)
-- node[midway, lbl] {market response} (state.north |- 0, 9.8)
-- (state.north);
-- (30.6, 8.9)
-- node[midway, above, lbl] {market response} (state.east);
%% ============================================================
%% Cross-panel connectors gutter at y = 1.0..2.2
@@ -152,8 +153,8 @@
% 2. Score -> State (depth y=1.45)
\draw[crossG, rounded corners=6pt]
(score.south) -- (score.south |- 0, 1.45)
-- node[pos=0.5, lbl] {contamination signal} (20.6, 1.45)
-- (20.6, 8.9)
-- node[pos=0.5, lbl] {contamination signal} (20.2, 1.45)
-- (20.2, 8.9)
-- (state.west);
% 3. Publish -> Provider (depth y=1.05, deepest)

Binary file not shown.

Binary file not shown.

View File

@@ -84,4 +84,18 @@ v4 & 64 & 275 & $64 \times 275 = 17{,}600$ \\
Converting to petaFLOPS: 160,320 TFLOPS equals approximately 160 PFLOPS. This is the theoretical peak under sustained arithmetic operations; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
\section{KL divergence when the reference has zeros}
\label{app:kl_zeros}
The textbook definition $D_{\mathrm{KL}}(P\parallel Q)=\sum_k P(k)\log(P(k)/Q(k))$ is not usable as-is when our empirical reference puts $Q(k)=0$ somewhere the session distribution still visits: if $P(k)>0$ and $Q(k)=0$, that term wants to blow up to infinity. With only 29 sessions the estimated transition rows are incredibly sparse, so ``never seen in the prototype'' happens a lot.
In code we do the boring fix: add a tiny floor $\varepsilon$ to both the numerator and denominator inside the log so nothing is exactly zero, which turns the sum into a finite, smoothed surrogate rather than a literal KL to raw counts. We also skip source states that do not exist at all in the reference kernel, because there is nowhere honest to compare against. This keeps the pipeline running and the divergence scores on a comparable scale, at the cost that the number is regularized KL-ish behavior, not a purist information-theoretic quantity---which is acceptable here because we only use the gap between human-anchored and agent-anchored scores as a weak separability signal, not as a calibrated physical constant.
\section{Why the logarithm appears in the revelation surrogate}
\label{app:revelation_log}
$\text{COI}_{\text{leak}} = f(\tau')\cdot\text{InfoValue}$. Either $\text{InfoValue}=c>0$ (query-tax) or $\text{InfoValue}=-\log\pi(p\mid\tau')$ (revelation), with $\pi(\cdot\mid\tau')$ the policy over quoted prices in context $\tau'$.
For probability $q$, $-\log q$ is surprisal; for independent events, $-\log\prod_i q_i=\sum_i(-\log q_i)$. The revelation surrogate is that surprisal under $\pi(\cdot\mid\tau')$, scaled by $f(\tau')$. Use $\max\{\pi,\varepsilon\}$ so the term stays finite (cf.\ Appendix~\ref{app:kl_zeros}).
\end{document}

View File

@@ -18,20 +18,23 @@
\end{titlepage}
\begin{abstract}
With accelerated growth of Lager Language Model agents in e-commerce a novel adversarial dynamic to digital markets emerges. This paper address the vulnerability of dynamic pricing systems to AI intermediaries that decouple the information gather stages from the transaction execution. By conducing reconnaissance isolates sessions, agents circumvent the ``Cost of Information'' (COI) defined as the accumulated price premium typically thought demand expression estimators.
We formally define this phenomenon and derive the Cost of Information Theorem, proving that as the saturation of independent, utility-maximizing agents increases, the platforms ability to sustain a COI converges to zero, rendering standard dynamic pricing mechanisms incentive-incompatible.
To respond to this threat we propose a defensive framework which integrates behavioral economics with Adversarially Distributionally Robust Optimization (DRO). We introduce a custom e-commerce research platform built on hybrid Kappa-Lambda architecture, designed to capture and simulate high-fidelity controlled interaction trajectories. We further demonstrate through modeling that human and agent behaviors exhibit distinct transition probability kernels, enabling the construction of discriminative models based on Kullback-Leibler divergence.
These behavioral signals serve as inputs for a Distributionally Robust Reinforcement Learning (DR-RL) agent. We formulate the pricing problem as a Stackelberg game where the learner optimizes against an ambiguity set of demand distributions defined by the Wasserstein distance. This approach allows the pricing policy to remain robust against non-stationary contamination without overfitting to deterministic demand curves. The research validates a mechanism for preserving margin integrity and market equilibrium in an agent-mediated economy, while minimizing degradation to the legitimate human user experience (UX).
\noindent
Large language model (LLM) agents are spreading in e-commerce, one consequence is intermediaries that can separate information gathering from transaction execution. This thesis studies dynamic pricing when agents survey in isolated sessions and thereby weaken the \emph{Cost of Information} (COI), the premium platforms typically extract once demand signals are expressed.
We formalize the phenomenon and prove a Cost of Information theorem: as independent, utility-maximizing agents saturate price queries, the platform's sustainable margin goes to zero, so ordinary dynamic pricing is incentive-incompatible in the limit.
The defensive design combines behavioral signals with distributionally robust optimization (DRO). We implement a controlled storefront on a hybrid batch-streaming architecture and show that human and agent sessions induce different transition kernels. Kullback--Leibler divergence to class prototypes yields session scores that feed a distributionally robust reinforcement learning (DR-RL) policy, posed as a Stackelberg game with a Wasserstein ambiguity set over demand so the learner does not collapse to a single empirical demand curve under shifting contamination.
Factorial training on TPUs shows the expected short-run revenue hit from contamination and that the robust objective recovers COI and equilibrium structure in harder regimes (higher contamination, larger catalogs), accounting for UX to prevent supra-competitive pricing. Code and an interaction dataset are released for work on agent-mediated traffic.
\end{abstract}
\noindent\textbf{Keywords:} Dynamic Pricing, LLM Agents, Adversarial Machine Learning, E-commerce, Behavioral Detection, Reinforcement Learning
\vspace{1em}
\noindent\textbf{Acknowledgments:} This research was supported by the TPU Research Cloud program, which provided access to Google Cloud TPU accelerators (including TPU v4, v5e, and v6e).
\vspace{0.5em}
\noindent\textbf{Project page:} \url{https://velocitatem.github.io/PHANTOM/}
\clearpage
\tableofcontents
\clearpage
\input{chapters/01-intro}
\input{chapters/02-literature-review}
@@ -40,6 +43,8 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
\input{chapters/05-discussion}
\input{chapters/06-conclusion}
\input{chapters/acknowledgements}
\printbibliography
\clearpage
@@ -48,14 +53,14 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
\begin{description}
\item[Agent $A$] A non-human actor, typically an LLM-driven system that executes web actions toward a goal.
\item[Human $H$] A human participant interacting with the platform to complete a task.
\item[Actor Type $\theta$] A latent class parameter describing whether a session is generated by a human or an agent profile.
\item[Actor Class $Y$] A latent class parameter describing whether a session is generated by a human or an agent profile.
\item[Platform] A web interface exposing purchasable items and their offered prices.
\item[Session $s$] A bounded interaction record tied to one actor and one session identifier.
\item[Event $e_{s,k}$] A single interaction tuple in a session, including action, item target, and timestamp.
\item[Trajectory $\tau_s$] The ordered sequence of events generated within a session.
\item[Demand Proxy $\hat{q}_{t,i}$] A weighted aggregate of observed actions used as an operational substitute for latent demand.
\item[Action Weight Function $\omega(a)$] A mapping from action type to signal strength in the demand proxy.
\item[True Demand $d(p;\theta)$] The latent purchase response as a function of price and actor type.
\item[True Demand $d(p\mid Y,\theta)$] The latent purchase response as a function of price, actor class, and latent type.
\item[Contamination $\alpha$] The proportion of agent-generated traffic in the session mixture.
\item[Non-stationary Noise $\epsilon_t$] Time-varying residual variation not explained by the actor mixture.
\item[Pricing Policy $\pi(\tau)$] A function mapping observed interaction history to an offered price.
@@ -110,29 +115,23 @@ v4 & 64 & 275 & $64 \times 275 = 17{,}600$ \\
Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
\section{Slope-Test Verification: Revenue vs. Contamination}
\label{app:alpha_revenue_slope}
This appendix provides a compact verification of the slope result reported in the main results section. Using the same run-level pairs $x_i=\texttt{study/alpha}_i$ and $y_i=\texttt{eval/revenue\_mean}_i$ ($n=95$), we re-checked the ordinary least squares slope test in Python with standard test routines (SciPy two-sided $t$ test for the slope).
\[
\widehat{y}=326{,}878.57-60{,}631.95\,x,
\]
\[
t(93)=-8.2148,\qquad p=1.2038\times 10^{-12},\qquad R^2=0.4205,\qquad 95\%\,\text{CI}_{\beta_1}=[-75{,}288.76,\,-45{,}975.13].
\]
\section{KL divergence when the reference has zeros}
\label{app:kl_zeros}
The Python verification reproduces the reported coefficients and inference values, confirming that the slope-test results are correct under standard methods.
The textbook definition $D_{\mathrm{KL}}(P\parallel Q)=\sum_k P(k)\log(P(k)/Q(k))$ is not usable as-is when our empirical reference puts $Q(k)=0$ somewhere the session distribution still visits: if $P(k)>0$ and $Q(k)=0$, that term wants to blow up to infinity. With only 29 sessions the estimated transition rows are incredibly sparse.
\section{whoclickedit Dataset Card}
\label{app:whoclicked_card}
In code we do the basic fix: add a tiny floor $\varepsilon$ to both the numerator and denominator inside the log so nothing is exactly zero, which turns the sum into a finite, smoothed surrogate rather than a literal KL to raw counts. We also skip source states that do not exist at all in the reference kernel, because there is nowhere honest to compare against. This keeps the pipeline running and the divergence scores on a comparable scale, at the cost that the number is regularized KL behavior, not a purist information-theoretic quantity, which is acceptable here because we only use the gap between human-anchored and agent-anchored scores as a weak separability signal.
For transparency and reproducibility, this appendix includes the full dataset card used for the public release of the \texttt{whoclickedit} dataset.
\lstinputlisting[
caption={whoclickedit dataset card (README snapshot)},
label={lst:whoclicked_dataset_card}
]{chapters/auto/whoclicked_dataset_card.md}
\section{Expanding the Intuition of Information Value in the Reward}
\label{app:revelation_log}
Leakage is $\text{COI}_{\text{leak}} = f(\tau')\cdot\text{InfoValue}$. The query-tax form fixes $\text{InfoValue}=c>0$. The revelation form sets $\text{InfoValue}(p,\tau')=-\log\pi(p\mid\tau')$, with $\pi(\cdot\mid\tau')$ the policy distribution over quoted prices in context $\tau'$ (discretized as in the engine).
For an outcome with probability $q$, the quantity $-\log q$ is \emph{surprisal}. For independent events, $-\log\prod_i q_i=\sum_i(-\log q_i)$. The revelation term is surprisal under $X\sim\pi(\cdot\mid\tau')$, multiplied by $f(\tau')$. In practice we do $\max\{\pi,\varepsilon\}$ in place of $\pi$ so the log stays finite (same spirit as Appendix~\ref{app:kl_zeros}).
% \input{../build/concatenated_code}

View File

@@ -300,9 +300,9 @@ where $W_p$ is the $p$-Wasserstein distance and $\epsilon > 0$ is the ambiguity
The platform seeks a policy $\pi^*$ that maximizes worst-case revenue over the ambiguity set while penalizing information leakage to suspected agents:
\begin{equation}
\label{eq:robust_policy}
\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \; \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}_{\text{leak}}(p, \tau') - \eta \cdot \text{UX}(\tau', p) \right]
\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \; \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}_{\text{leak}}(p, \tau') - \eta_{\text{ux}} \cdot \text{UX}(\tau', p) \right]
\end{equation}
where $R(p, d) = p \cdot d$ is the revenue function.
where $R(p, d) = p \cdot d$ is the revenue function, $\lambda$ scales COI leakage, and $\eta_{\text{ux}}$ scales the UX penalty with $\text{UX}(\tau', p)\in[0,1]$.
\begin{definition}[COI Leakage]
The per-query information leakage cost is:

View File

@@ -83,7 +83,7 @@ In order for our research to have grounding in interactions we built a robust e-
The architecture of this platform begins with the deployed web-apps posting interaction data to our backend which processes them and stores each ingested interaction into a kafka cluster. This serves as our data reservoir tracking and associating each interaction with its session and importantly with which experiment it belongs to. Not only do we track the behavioral interactions, but our pricing provider micro-service, once called by the frontend reports the observed/queried price-product into kafka. This kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The final stage of the pricing pipeline, submits computed dynamic pricing results into a redis database for quick updates which is then read by the pricing provider and displayed on the webapp. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
\paragraph{Public Web Artifact} We transition the Kappa like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters or what the framework expects in terms of schemas for pricing providers and log ingestion servicse. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
\paragraph{Public Web Artifact} We transition the Kappa-like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment. This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment. To support further research in this intersection of fields we release P4P \footnote{\url{https://github.com/velocitatem/p4p}} as a public repository providing the interaction layer of the PHANTOM framework. This provides a configurable storefront which can be tailored to any commercial setting with a standardized session-level event tracking. We document the API adapters or what the framework expects in terms of schemas for pricing providers and log ingestion servicse. The repository is intended for controlled experimentation and method replication rather than production commerce deployment.
\subsubsection{DevOps Principles}
@@ -130,9 +130,9 @@ To speak to realism, user interviews reported that the platform architecture mir
The dynamic pricing mechanism elicited immediate behavioral adjustments. Participants were sensitive to price volatility: sudden boosts triggered urgency and faster booking attempts, while large listing-to-final discrepancies triggered deeper comparison behavior. This is comforting because the controlled setup still produces commercially relevant interaction data.
\subsubsection{Design of Training Factorial Study}
\subsubsection{Design of Training Sweeps}
The simulator has multiple configurable factors. We design a multi-factor study across five axes derived from the sweep configurations: (1) RL algorithm (PPO, A2C, DQN, Q-table; 4 levels), (2) contamination ratio sampled at four representative levels between 0.1 and 0.6, (3) robustness radius (3 levels), (4) COI penalty weight at two reference levels, and (5) pricing action granularity (two discretization settings for action levels); giving a grid of 192 configurations. Statistical power for the behavioral comparisons is determined by a two-sample test over per-session divergence scores.
The simulator has multiple configurable factors. Training runs are driven by Weights \& Biases sweep definitions versioned with the codebase, mixing random and grid schedules rather than a single full factorial. For the contamination ratio $\alpha$, exploratory sweeps draw $\alpha$ uniformly on $[0.1,0.6]$; some sweeps use the narrower interval $[0.1,0.5]$. Grid sweeps fix explicit level sets, for example $\alpha\in\{0.1,0.2,0.3,0.4,0.6,0.8\}$ (six levels, including $0.8$ beyond the typical exploratory upper endpoint) or five levels $\{0.1,0.2,0.3,0.4,0.6\}$. Auxiliary schedules also include $\alpha=0$ alongside positive values. Robustness radius $\epsilon_\alpha$, COI penalty $\lambda_\text{coi}$, RL algorithm (\texttt{ppo}, \texttt{a2c}, \texttt{dqn}, \texttt{qtable}), and the discretization of the price action grid vary by sweep. Broad random search may use uniform $\epsilon_\alpha\in[0,0.3]$ and $\lambda_\text{coi}\in[0.05,0.6]$; tighter grids may fix $\epsilon_\alpha=0.2$ and restrict $\lambda_\text{coi}$ to $\{0.15,0.30\}$. Behavioral distinguishability is assessed with a two-sample Mann--Whitney test on per-session divergence gap scores at cohort sizes $n_H=13$ and $n_A=16$.
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.

View File

@@ -2,9 +2,11 @@
\subsection{Transition to Agentic Market Microstructure}
Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungability however).
Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungibility however).
However, ecommerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1, such a transaction is not permissible. The platform must establish an initial valuation anchor defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate. We float the introduction of GenAI Agents as Institutional Market Makers. As the arms race for greater autonomy of agnetic systems grows, the commercial viability of AI agents has the potential to disseminate into every-day users directly interacting with them rather than e-commerce platforms. This is also under the assumption of expected transactional capabilities being given to AI Agents.
However, e-commerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1. Such a transaction is not permissible. The platform must establish an initial valuation anchor ($P_0$) defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate.
We float the introduction of GenAI Agents as Institutional Market Makers. As the arms race for greater autonomy of agentic systems grows, the commercial viability of AI agents has the potential to disseminate into everyday users directly interacting with them rather than e-commerce platforms. This is also under the assumption of expected transactional capabilities being given to AI Agents.
\subsection{Risk Assessment and Limitations}

View File

@@ -25,6 +25,7 @@
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{booktabs}
\usepackage[american]{babel}
\usepackage{csquotes}
\usepackage{subcaption}
\usepackage{siunitx}
@@ -57,8 +58,8 @@
literate={·}{{\textperiodcentered}}1 {}{{\textminus}}1 {}{{---}}1 {}{{--}}1
}
% Use biblatex with authoryear style for in-text citations like (Author, Year)
\usepackage[backend=bibtex,style=authoryear,natbib=true,maxcitenames=2]{biblatex}
% APA 7-style references and citations (requires biber)
\usepackage[style=apa,backend=biber]{biblatex}
\addbibresource{bib/references.bib}
% Page headers (SciTech format)

110
paper/src/summary.tex Normal file
View File

@@ -0,0 +1,110 @@
% -*- TeX-master: t -*-
% Two-page summary: one self-contained source file (no \input chapters).
\documentclass[10pt,letterpaper]{article}
\input{preamble}
\begin{document}
\singlespacing
\setlength{\parskip}{0.35em}
\setlength{\parindent}{0pt}
\small
\fancyhead[L]{}
\begin{center}
{\small\url{https://velocitatem.github.io/PHANTOM/}}\\[0.65em]
{\large\bfseries PHANTOM: Pricing Heuristics Against Non-human\\[0.15em] Transaction Orchestration Mechanisms}\\[0.55em]
{\normalsize Daniel Rösel\footnote{Bachelor of Computer Science \& Artificial Intelligence @ IE University, Madrid}}\\[0.55em]
{\small Supervised by Alberto Martín Izquierdo}\\[0.35em]
{\small \today}
\end{center}
\vspace{0.75em}
Large language model (LLM) agents are spreading in e-commerce, one consequence is intermediaries that can separate information gathering from transaction execution.
This thesis studies dynamic pricing when agents reconnoitre in isolated sessions and thereby weaken the \emph{Cost of Information} (COI), the premium platforms typically extract once demand signals are expressed.
The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on \parencite{xia_evaluation-driven_2025}.
Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control.
The missing bridge is a principled framework for distinguishing non-human reconnaissance from genuine human demand expression and integrating that distinguishability into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index).
This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems).
The thesis focuses on the second, where information acquisition strategically precedes purchase execution.
Our effort to combat contamination stems from research by \textcite{hardt_strategic_2015} on strategic classification, in conjunction with \textcite{liu_contextual_2024} who demonstrate a linear regret if contamination is ignored.
To bridge the gap between detection and robust pricing, we look at work in Distributionally Robust Optimization (DRO): by optimizing for the worst-case distribution within this set, pricing mechanisms can become resilient to the distributional shifts such as the ones caused by non-human actors \parencite{kuhn_wasserstein_2024}.
In order to create an environment in which prices can be tested against a demand estimate generated by some behavioral model, we take inspiration from the architecture proposed by \textcite{ie_recsim_2019} in the RecSim platform built for recommendation systems.
The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation.
As proposed by \textcite{shahidi_coasean_2025} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets.
\vspace{0.5em}
In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI.
We formally define interaction data as coming from some actor which can either be an agent ($A$) or human ($H$).
Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$.
This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \parencite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributions.
For purposes of this research, an agent is an algorithmic loop with the ability to access a web platform and perform actions such as clicks, scrolls, and input field fills.
\vspace{0.5em}
The platform does not directly observe the true underlying demand function $d(p)$ where $d \in \mathbb{R}^{+}$ and our proxy $\hat{q} \in \mathbb{R}^{+}$.
Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types.
The total observed demand is a stochastic process governed by the naively defined mixture $Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p\mid Y=H,\theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p\mid Y=A,\theta)] + \epsilon_t$ where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
The platform's pricing power comes from information asymmetry: users who express strong interest signals pay more than the base price.
We quantify this markup as the \textit{Cost of Information} (COI), which represents the average premium extracted above marginal cost.
We formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic.
As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes.
\vspace{0.5em}
In order for our research to have grounding in interactions we built a robust e-commerce web-platform.
The architecture of this platform begins with the deployed web-apps posting interaction data to our backend which processes them and stores each ingested interaction into a Kafka cluster.
This serves as our data reservoir tracking and associating each interaction with its session and importantly with which experiment it belongs to.
Not only do we track the behavioral interactions, but our pricing provider micro-service, once called by the frontend reports the observed/queried price-product into Kafka.
This Kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger.
The final stage of the pricing pipeline, submits computed dynamic pricing results into a redis database for quick updates which is then read by the pricing provider and displayed on the webapp.
This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks.
We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
In addition to behavioral events, the platform logs price observations to a separate Kafka topic.
Each price query generates a record $(i, p, \text{sid}, \phi, t)$ associating the product, displayed price, requesting session, platform mode, and timestamp.
This dual-stream architecture enables joint analysis of price exposure and behavioral response.
We transition the Kappa-like architecture of the data collection to a Lambda architecture for actual learning in a surrogate environment.
This allows us to move faster on data which is provided and helps us create a feedback loop for production deployment.
Operationally, goals and experiment runs are tracked in PostgreSQL (goal table, run table, and assignment mapping).
This data-acquisition phase is the first half of the methodology and is intentionally a disconnected component that feeds the later contributions.
The second half uses collected behavioral traces to distinguish classes $Y \in \{A,H\}$ with session-conditioned probability estimates, then injects those estimates into the pricing learner.
Our process follows three stages: (1) observe and \textit{vectorize} behavioral interactions, (2) learn distinguishability to characterize human versus agent patterns, and (3) use the learned signal to train a defensive policy in a controlled dynamic-pricing simulator.
Our web platform (developed in similar spirit to RecSim \parencite{ie_recsim_2019}) gives us a controlled environment where tasks are assigned to human and agentic actors and then executed.
\vspace{0.5em}
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $Y_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier.
We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class.
This allows us to construct a \textit{Contamination Generator} $\mathcal{G}(\alpha)$.
We formulate pricing as a Stackelberg game: the platform (leader) sets prices $p_t$, and the population (follower) responds through trajectories and demand.
Because contamination level $\alpha$ and demand shift are non-stationary online, a simple error term is not enough.
We therefore use a Distributionally Robust Optimization objective.
We define an ambiguity set $\mathcal{U}_\epsilon(\hat{P}_N)$ centered around our empirical reference distribution $\hat{P}_N$ (derived from the generator $\mathcal{G}$).
We utilize the Wasserstein distance metric to define the set of plausible demand distributions the agent might face.
The robust policy $\pi^*$ is obtained by solving the maximin problem $\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}_{\text{leak}}(p,\tau') - \eta_{\text{ux}} \cdot \text{UX}(\tau', p) \right]$ where $R(p, d)$ is the revenue function, $\lambda$ weighs the information-leakage penalty, and $\eta_{\text{ux}}$ weighs the UX term.
In practice, we parameterize this with a session-level leakage term $\text{COI}_{\text{leak}}(p,\tau') = f(\tau')\cdot \text{InfoValue}(p,\tau')$ where $f(\tau')$ is the weak agent probability.
As part of reward engineering, we keep a UX factor ($UX\in[0,1]$) as an auxiliary evaluation axis.
Our training budget is provisioned through TPU Research Cloud and spans 320 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve.
At peak BF16 throughput this corresponds to approximately $160$\,PFLOPS of aggregate compute.
\vspace{0.5em}
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid).
The two-sided test result ($p<0.001$) at $n_H=13$, $n_A=16$ indicates strong rank distinction between groups, providing evidence that the transition kernels are distinguishable enough to justify their use as a control signal in downstream pricing.
Interpreted on the contamination grid, a $+0.1$ increase in $\alpha$ corresponds to an average revenue decrease of about $9{,}014$ units, and the robust check preserves both direction and significance.
The ability to extract COI is greater in the presence of robustness within the training loop; empirical evidence shows that agent contamination reduces revenue and that robustness is condition-dependent, requiring explicit calibration rather than a one-size-fits-all penalty.
\vspace{0.5em}
Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy.
This technology does not come without a more bitter side, ethical concerns do arise from the idea of deploying black-box like solutions to set prices based on a behavioral attributes.
\vspace{0.5em}
Contributions include formalization of non-human transaction orchestration in e-commerce as a distinct source of contamination, definition of COI together with a theorem showing its erosion under increasing agent saturation, a controlled e-commerce research platform built on a hybrid Kappa-Lambda architecture, empirical validation of behavioral distinguishability, translation of distinguishability into a distributionally robust reinforcement learning formulation, and release of a reusable public experimental artifact.
\vspace{0.65em}
\noindent\textbf{Acknowledgments.}\quad
This research was supported by the TPU Research Cloud program, which provided access to Google Cloud TPU accelerators (including TPU v4, v5e, and v6e).
Eugene Bykovets, PhD---ETH.
\renewcommand*{\bibfont}{\footnotesize}
\printbibliography[title={References}]
\end{document}

View File

@@ -4,16 +4,58 @@ set -euo pipefail
cmd="${1:-}"
sync_mdp_figures() {
local script_dir project_root sim_dir chapters_dir
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
project_root="$(cd "$script_dir/.." && pwd)"
sim_dir="$project_root/sim/rl/behavior_loader"
chapters_dir="$project_root/paper/src/chapters"
printf '%s\n' 'Refreshing MDP figures for paper...'
(
cd "$sim_dir"
python models.py
)
cp "$sim_dir/human_mdp_viz.pdf" "$chapters_dir/mdp_human.pdf"
cp "$sim_dir/agent_mdp_viz.pdf" "$chapters_dir/mdp_agent.pdf"
}
# Biber runs with cwd paper/build; \addbibresource{bib/references.bib} must resolve there.
# Symlink makes biber log 'bib/references.bib' (not ../src/...) so latexmk's post-check passes.
link_build_bib() {
ln -sfn ../src/bib ../build/bib
}
# Biblatex uses biber; a stale latexmk fdb can still record ["bibtex <job>"], so latexmk skips
# biber, main.bbl is missing or wrong, and every citation stays undefined. Drop only that case.
drop_stale_latexmk_bibtex_fdb() {
local job fdb tag
for job in main main-genpop summary; do
fdb="../build/${job}.fdb_latexmk"
tag=$(printf '["bibtex %s"]' "$job")
if [[ -f "$fdb" ]] && grep -Fq "$tag" "$fdb"; then
rm -f "$fdb"
fi
done
}
case "$cmd" in
build)
mkdir -p paper/build
sync_mdp_figures
bash paper/concat_code.sh
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pdf -jobname=main -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main.tex
;;
watch)
mkdir -p paper/build
sync_mdp_figures
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pvc -pdf -jobname=main -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main.tex
;;
clean)
@@ -33,12 +75,18 @@ case "$cmd" in
;;
build-genpop)
mkdir -p paper/build
sync_mdp_figures
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pdf -jobname=main-genpop -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main-genpop.tex
;;
watch-genpop)
mkdir -p paper/build
sync_mdp_figures
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pvc -pdf -jobname=main-genpop -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main-genpop.tex
;;
build-arxiv)
@@ -50,6 +98,20 @@ case "$cmd" in
pdflatex -interaction=nonstopmode -file-line-error main.tex
cp main.pdf ../../../build/main-arxiv.pdf
;;
build-summary)
mkdir -p paper/build
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pdf -jobname=summary -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build summary.tex
;;
watch-summary)
mkdir -p paper/build
cd paper/src
link_build_bib
drop_stale_latexmk_bibtex_fdb
latexmk -pvc -pdf -jobname=summary -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build summary.tex
;;
*)
printf '%s\n' "Unknown paper command: $cmd" >&2
exit 1

View File

@@ -3,10 +3,13 @@
Computes divergence signals delta_H, delta_A from session trajectories using
transition kernel estimation and KL divergence to prototype behavioral profiles.
"""
from __future__ import annotations
from typing import Dict, List, Tuple, TYPE_CHECKING
import numpy as np
from lib.agent_probability import DEFAULT_AGENT_PRIOR, estimate_agent_probability
if TYPE_CHECKING:
from .simplified import Event, Session
@@ -32,7 +35,10 @@ TRANS_A = {
def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
"""KL divergence D_KL(p || q) for discrete distributions."""
keys = set(p.keys()) | set(q.keys())
return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys)
return sum(
p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps))
for k in keys
)
def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
@@ -44,7 +50,11 @@ def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
trans.setdefault(prev, {})
trans[prev][curr] = trans[prev].get(curr, 0) + 1
prev = curr
return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0}
return {
s: {d: c / sum(dsts.values()) for d, c in dsts.items()}
for s, dsts in trans.items()
if sum(dsts.values()) > 0
}
def compute_divergence(session: "Session") -> Tuple[float, float]:
@@ -55,18 +65,35 @@ def compute_divergence(session: "Session") -> Tuple[float, float]:
"""
kernel = build_kernel(session.events)
if not kernel:
return 0.5, 0.5
delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel)
delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel)
return 0.0, 0.0
delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(
kernel
)
delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(
kernel
)
return delta_h, delta_a
def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
"""Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)).
def estimate_alpha(
session: "Session",
beta: float = 2.0,
prior_agent: float = DEFAULT_AGENT_PRIOR,
) -> float:
"""Per-session contamination estimate alpha_hat = sigma((delta_H - delta_A) / T).
Returns probability session is agent-generated based on behavioral divergence.
"""
dh, da = compute_divergence(session)
if (dh + da) <= 0:
return 0.5
return 1.0 / (1.0 + np.exp(-beta * (dh - da)))
return float(prior_agent)
if beta <= 0:
return estimate_agent_probability(
dh, da, temperature=1.0, prior_agent=prior_agent
)
return estimate_agent_probability(
delta_h=dh,
delta_a=da,
temperature=1.0 / beta,
prior_agent=prior_agent,
)

View File

@@ -3,7 +3,7 @@ try:
except ImportError:
from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader
from collections import defaultdict
from typing import Dict, List, Tuple, Set
from typing import Dict, List, Optional, Set, Tuple
import numpy as np
import graphviz
import sys
@@ -195,6 +195,110 @@ def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
return dict(evt_trans)
def _resolve_event_order(
evt_trans: Dict[str, Dict[str, float]],
event_order: Optional[List[str]] = None,
) -> List[str]:
observed = set(evt_trans.keys()) | {
dst for transitions in evt_trans.values() for dst in transitions
}
if event_order:
ordered = list(dict.fromkeys(event_order))
missing = sorted(observed - set(ordered))
return ordered + missing
return sorted(observed)
def _compass_from_angle(angle_rad: float) -> str:
ports = ("e", "ne", "n", "nw", "w", "sw", "s", "se")
normalized = (angle_rad + (2 * np.pi)) % (2 * np.pi)
step = np.pi / 4
idx = int(np.round(normalized / step)) % len(ports)
return ports[idx]
def _edge_ports(
src: str,
dst: str,
positions: Dict[str, Tuple[float, float]],
has_reverse: bool,
) -> Tuple[str, str]:
src_x, src_y = positions[src]
dst_x, dst_y = positions[dst]
angle = float(np.arctan2(dst_y - src_y, dst_x - src_x))
if has_reverse:
bend = np.pi / 10
angle += bend if src < dst else -bend
tail_port = _compass_from_angle(angle)
head_port = _compass_from_angle(angle + np.pi)
return tail_port, head_port
def _edge_style(prob: float) -> Dict[str, str]:
if prob >= 0.75:
edge_color = "#111827"
elif prob >= 0.50:
edge_color = "#374151"
elif prob >= 0.25:
edge_color = "#6b7280"
else:
edge_color = "#9ca3af"
return {
"color": edge_color,
"fontcolor": "#111827",
"fontsize": "10",
"penwidth": f"{0.9 + 3.6 * prob:.2f}",
"arrowsize": f"{0.55 + 0.55 * prob:.2f}",
}
def _format_node_label(evt: str) -> str:
max_line_len = 16
tokens = evt.split("_")
if len(tokens) == 1:
return evt
lines: List[str] = []
curr = ""
for token in tokens:
piece = token if not curr else f"_{token}"
if curr and len(curr) + len(piece) > max_line_len:
lines.append(curr)
curr = token
else:
curr = f"{curr}{piece}" if curr else token
if curr:
lines.append(curr)
return "\n".join(lines)
def _compute_flow_positions(
events: List[str],
layout_radius: float,
) -> Dict[str, Tuple[float, float]]:
"""Balanced grid layout for paper-friendly diagrams."""
if not events:
return {}
num_events = len(events)
cols = int(np.ceil(np.sqrt(num_events)))
rows = int(np.ceil(num_events / cols))
x_step = max(layout_radius * 1.10, 3.6)
y_step = max(layout_radius * 0.95, 3.2)
positions: Dict[str, Tuple[float, float]] = {}
for idx, evt in enumerate(events):
row = idx // cols
col = idx % cols
x = (col - (cols - 1) / 2.0) * x_step
y = ((rows - 1) / 2.0 - row) * y_step
positions[evt] = (float(x), float(y))
return positions
def visualize_mdp(
model: BehaviorModel,
threshold: float = 0.05,
@@ -202,25 +306,91 @@ def visualize_mdp(
fmt: str = "svg",
view: bool = False,
export_dot: bool = False,
event_order: Optional[List[str]] = None,
layout_radius: float = 10.0,
node_diameter: float = 1.8,
label_threshold: float = 0.08,
drop_isolated_nodes: bool = False,
):
if not model.mdp:
raise ValueError("build MDP first")
evt_trans = aggregate_event_transitions(model.mdp)
g = graphviz.Digraph(format=fmt)
g.attr(rankdir="LR", size="30")
g.attr("node", shape="circle", width="1", height="1")
ordered_events = _resolve_event_order(evt_trans, event_order=event_order)
events = set(evt_trans.keys()) | {
e for trans in evt_trans.values() for e in trans.keys()
edges = [
(src, dst, prob)
for src, dsts in evt_trans.items()
for dst, prob in dsts.items()
if prob > threshold
]
if drop_isolated_nodes:
connected = {src for src, _, _ in edges} | {dst for _, dst, _ in edges}
ordered_events = [evt for evt in ordered_events if evt in connected]
positions = _compute_flow_positions(ordered_events, layout_radius=layout_radius)
g = graphviz.Digraph(format=fmt, engine="neato")
g.attr(
overlap="false",
splines="true",
outputorder="edgesfirst",
pad="0.5",
sep="+9",
esep="+4",
bgcolor="white",
dpi="180",
)
g.attr(
"node",
shape="circle",
fixedsize="true",
width=f"{node_diameter:.2f}",
height=f"{node_diameter:.2f}",
fontsize="11",
fontname="Helvetica",
style="filled",
fillcolor="white",
color="#374151",
fontcolor="#111827",
penwidth="1.8",
peripheries="1",
)
g.attr(
"edge",
fontname="Helvetica",
)
for evt in ordered_events:
x, y = positions[evt]
g.node(evt, label=_format_node_label(evt), pos=f"{x:.2f},{y:.2f}!", pin="true")
edge_set = {
(src, dst) for src, dst, _ in edges if src in positions and dst in positions
}
for evt in events:
g.node(evt)
for src, dsts in evt_trans.items():
for dst, prob in dsts.items():
if prob > threshold:
g.edge(src, dst, label=f"{prob:.2f}")
for src, dst, prob in sorted(edges, key=lambda row: row[2]):
if src not in positions or dst not in positions:
continue
edge_attrs: Dict[str, str] = _edge_style(prob)
if src == dst:
# pick a loop port away from the main flow
sx, sy = positions[src]
loop_port = "n" if sy <= 0 else "s"
edge_attrs.update({"tailport": loop_port, "headport": loop_port})
else:
has_reverse = (dst, src) in edge_set
tail_port, head_port = _edge_ports(src, dst, positions, has_reverse)
edge_attrs.update({"tailport": tail_port, "headport": head_port})
if has_reverse:
edge_attrs["constraint"] = "false"
if prob >= label_threshold or src == dst:
edge_attrs["label"] = f" {prob:.2f} "
g.edge(src, dst, **edge_attrs)
g.render(output, view=view, cleanup=True)
print(f"Saved MDP graph to {output}.{fmt}")
@@ -342,11 +512,6 @@ if __name__ == "__main__":
f"Built MDP: {human_mdp['num_states']} states, "
f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions"
)
if not human_mdp["states"]:
exit("No states found")
visualize_mdp(
human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True
)
agent_model = AgentBehaviorModel(agent_dir)
agent_mdp = agent_model.build_MDP()
@@ -355,14 +520,36 @@ if __name__ == "__main__":
f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions"
)
if not agent_mdp["states"]:
exit("No states found")
visualize_mdp(
agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True
)
human_evt = aggregate_event_transitions(human_mdp)
agent_evt = aggregate_event_transitions(agent_mdp)
canonical_events = sorted(
(set(human_evt.keys()) | {e for tr in human_evt.values() for e in tr.keys()})
| (set(agent_evt.keys()) | {e for tr in agent_evt.values() for e in tr.keys()})
)
if not human_mdp["states"]:
exit("No states found")
visualize_mdp(
human_model,
threshold=0.05,
output="human_mdp_viz",
fmt="pdf",
export_dot=True,
event_order=canonical_events,
)
if not agent_mdp["states"]:
exit("No states found")
visualize_mdp(
agent_model,
threshold=0.05,
output="agent_mdp_viz",
fmt="pdf",
export_dot=True,
event_order=canonical_events,
drop_isolated_nodes=True,
)
common = set(human_evt.keys()) & set(agent_evt.keys())
@@ -394,6 +581,7 @@ if __name__ == "__main__":
output="joint_mdp_viz",
fmt="pdf",
export_dot=True,
event_order=canonical_events,
)
inter_class_avg = float(np.mean([kl for _, kl in kl_divs]))

View File

@@ -1,14 +1,24 @@
"""Vectorized KL divergence for separability scoring."""
import numpy as np
from typing import Tuple
from lib.agent_probability import (
DEFAULT_AGENT_PRIOR,
estimate_agent_probability_batch,
)
try:
import jax.numpy as jnp
from jax import jit
JAX_AVAILABLE = True
except ImportError:
jnp, JAX_AVAILABLE = np, False
def jit(f): return f
def jit(f):
return f
@jit
def batch_kl(P, Q_human, Q_agent, eps=1e-10):
@@ -20,10 +30,15 @@ def batch_kl(P, Q_human, Q_agent, eps=1e-10):
delta_a = jnp.sum(p * jnp.log(p / qa), axis=(1, 2))
return delta_h, delta_a
def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
def compute_divergences(
session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
"""Compute KL divergence of each session from human/agent prototypes."""
if JAX_AVAILABLE:
dh, da = batch_kl(jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent))
dh, da = batch_kl(
jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent)
)
return np.asarray(dh), np.asarray(da)
# numpy fallback
eps = 1e-10
@@ -34,10 +49,19 @@ def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_ag
delta_a = np.sum(p * np.log(p / qa), axis=(1, 2))
return delta_h, delta_a
def estimate_alpha_batch(prob_agent: np.ndarray, delta_h: np.ndarray, delta_a: np.ndarray, temp: float = 1.0) -> np.ndarray:
"""Vectorized alpha estimation from classifier probs and divergences."""
mass = delta_h + delta_a
ratio = np.where(mass > 1e-8, delta_a / mass, 0.5)
blended = 0.5 * prob_agent + 0.5 * ratio
if temp <= 0: return np.clip(blended, 0.0, 1.0)
return np.clip(1.0 / (1.0 + np.exp(-temp * (blended - 0.5))), 0.0, 1.0)
def estimate_alpha_batch(
prob_agent: np.ndarray,
delta_h: np.ndarray,
delta_a: np.ndarray,
temp: float = 1.0,
prior_agent: float = DEFAULT_AGENT_PRIOR,
) -> np.ndarray:
"""Vectorized alpha estimation using divergence gap mapping."""
_ = prob_agent
return estimate_agent_probability_batch(
delta_h=np.asarray(delta_h, dtype=float),
delta_a=np.asarray(delta_a, dtype=float),
temperature=temp,
prior_agent=prior_agent,
)