From 392f9b15495aaa7a3b41652c1fdffbf035f76787 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 8 Apr 2026 19:21:49 +0200
Subject: [PATCH] adding docs

---
 .gitignore                       |   3 +
 Makefile                         |  15 +-
 README.md                        |  10 +-
 SETUP.md                         | 298 +++++++++++++++++++++++++++++++
 docs/index.html                  |   8 +
 docs/mkdocs.yml                  |  53 ++++++
 docs/requirements.txt            |   1 +
 docs/src/architecture.md         |  30 ++++
 docs/src/business.md             |  21 +++
 docs/src/configuration.md        |  63 +++++++
 docs/src/glossary.md             |  17 ++
 docs/src/index.md                |  21 +++
 docs/src/platform-setup.md       |   5 +
 docs/src/roadmap.md              |  26 +++
 paper/src/chapters/mdp_agent.pdf | Bin 10932 -> 10931 bytes
 paper/src/chapters/mdp_human.pdf | Bin 11953 -> 11953 bytes
 paper/src/main.tex               |   2 +-
 17 files changed, 570 insertions(+), 3 deletions(-)
 create mode 100644 SETUP.md
 create mode 100644 docs/mkdocs.yml
 create mode 100644 docs/requirements.txt
 create mode 100644 docs/src/architecture.md
 create mode 100644 docs/src/business.md
 create mode 100644 docs/src/configuration.md
 create mode 100644 docs/src/glossary.md
 create mode 100644 docs/src/index.md
 create mode 100644 docs/src/platform-setup.md
 create mode 100644 docs/src/roadmap.md

diff --git a/.gitignore b/.gitignore
index 11ff6b1..1107134 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,9 @@ dist/
 **/*.parquet
 **/_build/
 
+# mkdocs output (run make docs.platform locally or rely on CI)
+docs/documentation/
+
 # paper build artifacts
 paper/src/bib/auto
 paper/src/auto/*
diff --git a/Makefile b/Makefile
index 754751c..c10eb10 100644
--- a/Makefile
+++ b/Makefile
@@ -44,7 +44,7 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
 
 .PHONY: help
 help:
-	@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all"
+	@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | docs.platform | manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all"
 	@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
 	@echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
 	@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
@@ -186,6 +186,19 @@ study.margin-erosion:
 study.margin-erosion.quick:
 	python -m engine.studies.margin_erosion_alpha --quick
 
+DOCS_VENV ?= docs/.venv
+DOCS_MKDOCS := $(DOCS_VENV)/bin/mkdocs
+DOCS_PIP := $(DOCS_VENV)/bin/pip
+
+.PHONY: docs.platform
+docs.platform: $(DOCS_VENV)
+	$(DOCS_MKDOCS) build -f docs/mkdocs.yml
+
+$(DOCS_VENV):
+	python3 -m venv $(DOCS_VENV)
+	$(DOCS_PIP) install --upgrade pip
+	$(DOCS_PIP) install -r docs/requirements.txt
+
 .PHONY: wordcount
 wordcount:
 	@$(NX) run paper:wordcount
diff --git a/README.md b/README.md
index a21d899..cc4a8aa 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,10 @@ flowchart LR
 | `experiments/` | Data processing, ETL ideas, and analysis assets |
 | `docker/` | Dockerfiles for platform services |
 | `tests/e2e/` | Playwright end-to-end tests |
-| `docs/` | Academic project page source |
+| `docs/` | Academic project page (GitHub Pages root) + MkDocs config |
+| `docs/src/` | Markdown sources for the operator documentation site |
+| `docs/documentation/` | MkDocs build output (gitignored; run `make docs.platform`; served at `/documentation/` on Pages) |
+| `SETUP.md` | Unified operator guide: stack, kernels, RL training, thesis refs by chapter |
 
 ## Operational notes
 
@@ -151,6 +154,11 @@ flowchart LR
 - Research commands (`make train`, `make benchmark*`, `make train.agent`) auto-load `.env.sweep`.
 - Paper builds call `paper/concat_code.sh` before compilation to flatten code into the appendix.
 
+## Operator documentation
+
+- Full setup guide (platform + research): [`SETUP.md`](SETUP.md)
+- Hosted operator docs (after `make docs.platform`): […/PHANTOM/documentation/](https://velocitatem.github.io/PHANTOM/documentation/) on GitHub Pages
+
 ## Research artifacts
 
 - Thesis PDF: `thesis-latest.pdf` or [hosted PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
diff --git a/SETUP.md b/SETUP.md
new file mode 100644
index 0000000..d2c37e0
--- /dev/null
+++ b/SETUP.md
@@ -0,0 +1,298 @@
+# PHANTOM: setup for operators and partners
+
+This guide walks a team from **business context** (what you sell, how you price, what traffic you worry about) through a **running PHANTOM stack**, **behavioral kernels and contamination**, and **RL training / benchmarking**. The math lives in the thesis PDF; here we tie operations to that math without re-deriving it. References to the thesis use **chapter numbers** only (build the PDF locally if you need line-level citations).
+
+**Thesis (PDF):** [thesis-latest.pdf](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
+
+---
+
+## 1. Who this is for / prerequisites
+
+**Audience:** Engineers and researchers who run Docker, a Next.js app, and Python tooling; product or risk stakeholders who define experiment goals and acceptable UX tradeoffs.
+
+**Skills:** Docker Compose, Node/npm, Python 3.8+, basic Kafka/Redis mental model.
+
+**Decide up front:**
+
+- **Vertical vs demo:** The repo ships `hotel` and `airline` storefront modes (`STORE_MODE`). Anything beyond that is custom integration work.
+- **Data residency:** Event streams and training artifacts default to paths under the repo (overridable via `PHANTOM_`* env vars in `lib/config.py`). Decide where logs and models may live before you point production-like traffic at the stack.
+- **Experiment governance:** Who may run human vs agent sessions, how sessions are labeled or weak-labeled for research, and retention policy for interaction logs.
+
+### Theoretical implications
+
+The formal model assumes each session is generated by a latent **actor class** $Y \in H,A$ (human vs agent). Your deployment choices implicitly assert **which sessions are valid for estimating human vs agent behavior** and whether experimental conditions are stable. If you mix exploratory QA traffic with labeled experiments without recording that fact, you blur the empirical partitions $D_H$ and $D_A$ that the methodology needs for transition kernels and contamination studies. See the **Introduction** (research questions) and **Methodology**, Problem Formalization, in the thesis PDF.
+
+---
+
+## 2. Business fit framing
+
+**What PHANTOM is for:** Studying how **automated browsing and transaction orchestration** interact with **session-based pricing**: behavior generates a demand proxy $\hat{q}$; pricing policies map interaction history to prices; **Cost of Information (COI)** is the premium the platform can sustain above a floor when information is scarce. Agent-mediated **reconnaissance in one session** and **purchase in another** undermines that asymmetry; the thesis proves a **COI erosion** mechanism under many independent price queries.
+
+**What you must supply:**
+
+- A **product catalog** path: defaults assume Supabase-backed product data (`NEXT_PUBLIC_SUPABASE_URL`, `NEXT_PUBLIC_SUPABASE_ANON_KEY`).
+- A plan for **interaction and price events** reaching the ingestion path (backend → Kafka) or an adapter you maintain.
+- Clear **experiment goals:** e.g. compare human vs agent KPIs under the same task, measure margin under varying contamination $\alpha$.
+
+### Theoretical implications
+
+Aggregate demand in the thesis is a **mixture** over human and agent types with contamination $\alpha$ plus noise $\epsilon_t$; see the mixture demand discussion in **Chapter 3 (Methodology)**. COI is defined as $\mathbb{E}[P]-\underline{p}$; the **COI framework** and theorem in the same chapter explain why saturated agent querying collapses extractable premium. Your business scenario determines which **actions** enter $\hat{q}$ and how interpretable $\alpha$ is for your traffic.
+
+---
+
+## 3. Environment and secrets
+
+**Bootstrap files (from repo root):**
+
+```bash
+npm install
+cp .env.example .env
+cp .env.sweep.example .env.sweep
+```
+
+**Core `.env` (platform + web + docker):** See `[.env.example](.env.example)`. You must also set the variables called out in `[README.md](README.md)` for a full stack: `NEXT_PUBLIC_SUPABASE_URL`, `NEXT_PUBLIC_SUPABASE_ANON_KEY`, `AIRFLOW_FERNET_KEY`, `AIRFLOW_SECRET_KEY` (and provider ports per your compose file).
+
+**Training / sweeps (`.env.sweep`):** Used by `make train`, `make benchmark`, sweep agents. Typically `WANDB_API_KEY`, optional `WANDB_ENTITY` / `WANDB_PROJECT`, `GITHUB_TOKEN` for bootstrap flows, `SWEEP_ID` for W&B sweep workers. See `[.env.sweep.example](.env.sweep.example)`.
+
+**Security:** Never commit real `.env` or `.env.sweep` files. Rotate keys if they leak.
+
+### Theoretical implications
+
+Splitting **online platform credentials** (ingestion, catalog, Kafka) from **offline training credentials** (W&B, cloud TPUs, GitHub tokens for workers) mirrors the **hybrid Kappa–Lambda** data loop in the thesis: streaming observation vs batch / long-running training jobs. That split is named in the **Terminology** appendix of the thesis PDF.
+
+---
+
+## 4. Bring-up (commands)
+
+Aligned with `[README.md](README.md)`:
+
+```bash
+npm install
+cp .env.example .env
+cp .env.sweep.example .env.sweep
+# edit .env: Supabase, Airflow keys, etc.
+
+make platform.up
+make web.dev
+```
+
+**Sanity checks:**
+
+
+| Endpoint                                                      | Role                              |
+| ------------------------------------------------------------- | --------------------------------- |
+| `http://localhost:3000`                                       | Next.js storefront                |
+| `http://localhost:5000/health`                                | Backend ingest API                |
+| `http://localhost:5001/health`                                | Pricing provider                  |
+| `http://localhost:8085`                                       | Airflow UI (default compose port) |
+| `http://localhost:8084` or configured `REDPANDA_CONSOLE_PORT` | Kafka console (see your `.env`)   |
+
+
+**Optional tests:** `make test.backend` (with venv/tooling as in Makefile); `make test.e2e` requires backend, web, and Airflow up per README.
+
+### Theoretical implications
+
+A correctly wired stack logs **trajectories** $\tau_s$ (sequences of events) and **price exposure** together. **Chapter 3** defines events $e_{s,k}=(a,i,t)$ and proxies $\hat{q}$ from weighted actions—without joint logging of behavior and quotes, you cannot recover the objects the theory reasons about (Problem Formalization).
+
+---
+
+## 5. Service map
+
+```mermaid
+flowchart LR
+  U[Human / Agent Browser] --> W[Next.js Web App]
+  W -->|Price requests| P[Pricing Provider]
+  W -->|Interaction events| B[Backend Ingest API]
+  B --> K[Kafka]
+  K --> A[Airflow + Worker Jobs]
+  A --> R[Redis Model Registry]
+  P -->|Session/global prices| W
+  E[Research Engine + Experiments] --> A
+  E --> R
+```
+
+
+
+**Ports (typical; confirm in `docker-compose` and `.env`):** `BACKEND_PORT` (5000), `PROVIDER_PORT` (5001), `KAFKA_PORT`, `REDIS_PORT`, Airflow `AIRFLOW_WEBSERVER_PORT` (8085 default), Redpanda console.
+
+### Theoretical implications
+
+The platform **observes** behavioral proxies and quoted prices, not the latent demand curve $d(p\mid\theta)$. The distinction between $\hat{q}$ and true demand is explicit in **Chapter 3**. Misattributing proxy noise to “true” elasticity breaks both estimation and any causal story about COI.
+
+---
+
+## 6. Tailoring to your business
+
+**Storefront mode:** `STORE_MODE=hotel` or `airline` (see `[web/src/lib/config.ts](web/src/lib/config.ts)` and env). This switches catalog and UI, not the core ingestion pattern.
+
+**API base / environment:** `NEXT_PUBLIC_API_BASE`, `NEXT_PUBLIC_APP_ENV` (validated in `config.ts`).
+
+**Paths for data and runs:** Override with `PHANTOM_DATA_DIR`, `PHANTOM_SIM_RUNS_DIR`, `PHANTOM_MODEL_REGISTRY_DIR`, `PHANTOM_COLLECTED_DATA_DIR`, etc. (`[lib/config.py](lib/config.py)`).
+
+**Honest scope:** A new vertical (custom product ontology, checkout rules, pricing rules) means **new UI, events, and possibly new reward features** in the engine. Budget engineering time; the repo is a research platform, not a turnkey SaaS skin for arbitrary catalogs without code changes.
+
+### Theoretical implications
+
+Transition kernels $\hat{\mathcal{T}}_H,\hat{\mathcal{T}}_A$ are estimated on a **finite action / state space** derived from your instrumentation. Changing catalog depth or event taxonomy changes the MDP state space; old kernel estimates are not portable. See the transition kernel discussion in **Chapter 3**.
+
+---
+
+## 7. Data collection and experiments
+
+**Flow:** Browser → backend → **Kafka** → downstream consumers (Airflow DAGs, notebooks, ETL under `experiments/`). Ensure **session identity**, **item identifiers**, and **action types** are consistent enough to build trajectories.
+
+**Weak labels:** The thesis discusses partitioning data into human vs agent subsets for MLE transition counts. In production you may only have heuristic labels—document bias explicitly.
+
+### Theoretical implications
+
+Distinguishability (sub-question SQ1 in the **Introduction**) asks whether $H$ vs $A$ is identifiable from behavior alone. Your labeling and experimental design determine whether $\Delta_H,\Delta_A$ and $f(\tau)$ are meaningful or dominated by noise. Symbols appear in the **Terminology** appendix ($\Delta_H,\Delta_A$, $f(\tau)$, contamination generator $\mathcal{G}(\alpha)$).
+
+---
+
+## 8. Transition kernels and agent scoring (theory → practice)
+
+**Theory:** Sessions yield trajectories $\tau_s$. For each actor class $y\inH,A$, the thesis estimates a **Markov transition kernel** by counting transitions and normalizing (MLE):
+
+$$
+\hat{P}(s' \mid s) = \frac{N(s,s')}{\sum_k N(s,k)}
+$$
+
+Human and agent prototypes $\hat{\mathcal{T}}_H,\hat{\mathcal{T}}_A$ support comparing an empirical kernel from a partial trajectory to prototypes (e.g. KL-style divergences $\Delta_H,\Delta_A$) and mapping to a **weak agent probability** $f(\tau)$. See **Chapter 3** and the **Terminology** appendix.
+
+**Code:** `[engine/lib/coi.py](engine/lib/coi.py)` (`compute_agent_probability`: empirical transition counts vs human/agent reference dicts, KL-style terms, mapped via `[lib/agent_probability.py](lib/agent_probability.py)`).
+
+**Optional narrative:** `[blog/02-behavioral-fingerprinting.md](blog/02-behavioral-fingerprinting.md)` walks a concrete study design (not required for operators).
+
+### Theoretical implications
+
+If reference kernels are fit on **stale** or **mislabeled** partitions, $\Delta_H-\Delta_A$ is not interpretable as distinguishability. Ground claims in SQ1 (**Introduction**) and the kernel subsection of **Chapter 3**.
+
+---
+
+## 9. Contamination generator $\mathcal{G}(\alpha)$
+
+**Theory:** Given clean trajectories, $\mathcal{G}(\alpha)$ injects synthetic agent trajectories until the effective mixture reaches contamination $\alpha\in[0,1]$, defining training scenarios for robust policies (**Chapter 3**). Catalog-scale block expansion of kernels is discussed there with validation caveats—treat large product spaces as **research-grade** until your team signs off.
+
+**Code:** `[engine/engine.py](engine/engine.py)` — `MarketEngine` mixes human/agent demand, uses `get_adjusted_transitions` / `sample_behavior_from_transitions`, and `alpha` when combining actor types and building demand proxies (`estimate_demand`). This is the **simulator** path, not a drop-in replacement for your production database.
+
+### Theoretical implications
+
+$\alpha$ in mixture $Q(p)$ is **agentic demand contribution** in the formal model, not necessarily “bot share of page views” unless your instrumentation equates them. Mismeasuring $\alpha$ biases robust objectives tied to a fixed contamination level.
+
+---
+
+## 10. Training and evaluation — local workflow
+
+**Environment:** Python venv via Nx (`make install` / `nx run research:install`). Training commands load `.env.sweep`.
+
+```bash
+make train LOCAL_TRAIN_ARGS='--algo ppo --total-timesteps 50000'
+make benchmark LOCAL_BENCHMARK_ARGS='--tiers static,surge,linear,qtable,ppo --alpha-values 0.0,0.3 --episodes 3 --no-wandb'
+make benchmark.simple
+```
+
+Entrypoints: `[engine/train.py](engine/train.py)`, `[engine/benchmark.py](engine/benchmark.py)`, `[engine/spec.py](engine/spec.py)` (Nx wraps these—see `project.json` / research targets).
+
+**Artifacts:** `[lib/config.py](lib/config.py)` — `PHANTOM_SIM_RUNS_DIR` (default `sim/rl/runs`), `PHANTOM_MODEL_REGISTRY_DIR`, etc.
+
+**TensorBoard (optional):** `[docker-compose.yml](docker-compose.yml)` includes `tensorboard-rl` on host port **6007** (`./sim/rl/runs`) and `tensorboard-ml` on **6006** (`./experiments/ml/runs`).
+
+### Theoretical implications
+
+Local runs instantiate the **offline defense gym**: policies trained on simulator-induced distributions approximate the DR-RL narrative in **Chapter 3**, but hyperparameters ($\lambda$ on COI leakage, $\eta$ on UX, robust radius) change the effective ambiguity set. Cross-check `engine/` against the thesis before claiming figure-for-figure replication.
+
+---
+
+## 11. Training and evaluation — remote / scaled deployment
+
+For **research at scale** (cloud quota and secrets required):
+
+
+| Mechanism                                   | Role                                                                                                                      |
+| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
+| `[submit_ray_job.sh](submit_ray_job.sh)`    | Ray jobs with `.env` injected; `RAY_MODE=single|distributed|benchmark|sweep`. Set the script’s `ROOT` to your clone path. |
+| `make tpu.ray.bootstrap` / `tpu.ray.`*      | TPU Ray bootstrap (`TPU_CONF`, e.g. `tpu_orchestration/configs/v4_spot_us.conf`).                                         |
+| `make train.agent` / `make benchmark.agent` | W&B sweeps: `SWEEP_ID` in `.env.sweep`.                                                                                   |
+| `make train.bootstrap`                      | Worker bootstrap: `REPO_URL`, `SWEEP_ID`, `GITHUB_TOKEN`.                                                                 |
+| `make docker.train.publish`                 | Trainer image (`TRAIN_IMAGE_REF` in Makefile).                                                                            |
+
+
+See `submit_ray_job.sh` for env vars (`WANDB_*`, `PHANTOM_*` TPU toggles).
+
+### Theoretical implications
+
+Distributed training does not change the **definitions** of the Stackelberg game or Wasserstein ambiguity; it changes compute and variance of empirical estimates. Align random seeds and data protocol across nodes or split results explicitly—otherwise you mix distributions in a way a single empirical law $\hat{P}_N$ in the thesis does not describe.
+
+---
+
+## 12. Evaluation, artifacts, and audit trail
+
+**Benchmarks:** `make benchmark`* sweeps tiers and $\alpha$; CLI includes robustness knobs (see default `BENCHMARK_ARGS` in `submit_ray_job.sh`: `--robust-radius`, `--lambda-coi`, `--eta-ux`, etc.).
+
+**Audit trail:** Store `git` SHA, CLI argv, non-secret `.env.sweep` keys, and W&B run IDs with published tables. For scientific claims, cite **Chapters 4–5 (Results, Discussion)** in the thesis PDF.
+
+### Theoretical implications
+
+Evaluation quality equals **simulator fidelity** plus **contamination modeling**. Separate theorem statements (assumption-based) from empirical curves (`engine`-dependent).
+
+---
+
+## 13. Operational suggestions
+
+- **Staging:** Non-production namespaces; separate Kafka topics and Supabase projects where possible.
+- **Rate limits / abuse:** Protect ingest endpoints; respect participant privacy.
+- **Human vs agent sessions:** Comparable cohorts; record experimental condition in metadata.
+- **Contracts:** `tests/e2e/` encodes minimal flows—use when APIs change.
+
+### Theoretical implications
+
+Non-stationary noise $\epsilon_t$ and drifting $\alpha$ confound benchmark interpretation. **Chapter 3** discusses mixture identification: isolate treatments when possible and document confounders when not.
+
+---
+
+## 14. Roadmap / gaps (honesty)
+
+**Relatively turnkey:** Local dockerized stack, demo verticals, engine benchmarks, documented env and paths.
+
+**Typically custom:** Production catalog without Supabase, identity/fraud layers, legal review of logging, Kafka/Airflow SLAs, hardening the pricing provider for real money.
+
+**Thesis vs code:** The PDF is the **spec**; not every robustness term or large-catalog kernel construction is production-verified—see caveats in **Chapter 3**.
+
+### Theoretical implications
+
+Theorems in the thesis can be **stronger** than what observational firm logs support. The COI result assumes a clean experimental reading of the pricing policy; live market data may only support weaker claims.
+
+---
+
+## 15. Theory and thesis cross-references (quick index)
+
+Use the **PDF table of contents** with these anchors:
+
+
+| Topic                                                                      | Thesis location                                       |
+| -------------------------------------------------------------------------- | ----------------------------------------------------- |
+| Research questions (margin, distinguishability, contamination, mitigation) | **Introduction**                                      |
+| Sessions, events, $\hat{q}$, mixture $Q(p)$, $\alpha$                      | **Chapter 3** — Problem Formalization, mixture demand |
+| COI definition and erosion theorem                                         | **Chapter 3** — COI framework                         |
+| Transition kernels, MLE, $\mathcal{G}(\alpha)$                             | **Chapter 3**                                         |
+| DR-RL, ambiguity sets, Stackelberg                                         | **Chapter 3**                                         |
+| Symbol glossary (COI leakage, $f(\tau)$, UX, surrogates)                   | **Appendix — Terminology**                            |
+| Empirical results and limitations                                          | **Chapters 4–5**                                      |
+
+
+---
+
+## 16. Quick file index (code)
+
+
+| File                                                                               | Role                                               |
+| ---------------------------------------------------------------------------------- | -------------------------------------------------- |
+| `[engine/lib/coi.py](engine/lib/coi.py)`                                           | KL-style trajectory comparison; agent probability. |
+| `[engine/engine.py](engine/engine.py)`                                             | `MarketEngine`, mixture, demand proxy path.        |
+| `[lib/agent_probability.py](lib/agent_probability.py)`                             | Divergence → probability score.                    |
+| `[lib/config.py](lib/config.py)`                                                   | Paths and ports for artifacts.                     |
+| `[engine/train.py](engine/train.py)`, `[engine/benchmark.py](engine/benchmark.py)` | CLI entrypoints.                                   |
+| `[tpu_orchestration/](tpu_orchestration/)`                                         | TPU configs and helpers.                           |
+
+
+You do **not** need a running storefront for many **offline** benchmarks if the research Python environment is installed; you **do** need aligned instrumentation to connect production trajectories to kernel estimation.
\ No newline at end of file
diff --git a/docs/index.html b/docs/index.html
index 8eb6bd9..89062f6 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -183,6 +183,14 @@
           </div>
           <i class="fas fa-external-link-alt"></i>
         </a>
+        <a href="documentation/" class="work-item">
+          <div class="work-info">
+            <h5>Documentation</h5>
+            <p>Operator setup, configuration, architecture, and research pipeline (MkDocs).</p>
+            <span class="work-venue">Platform</span>
+          </div>
+          <i class="fas fa-book"></i>
+        </a>
         <a href="https://github.com/velocitatem/p4p" class="work-item" target="_blank">
           <div class="work-info">
             <h5>P4P Interaction Layer</h5>
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
new file mode 100644
index 0000000..01f94e1
--- /dev/null
+++ b/docs/mkdocs.yml
@@ -0,0 +1,53 @@
+site_name: PHANTOM Platform
+site_description: Operator and research documentation for the PHANTOM dynamic pricing research platform.
+site_url: https://velocitatem.github.io/PHANTOM/documentation/
+site_author: Daniel Rösel
+
+repo_url: https://github.com/velocitatem/PHANTOM
+repo_name: velocitatem/PHANTOM
+
+docs_dir: src
+site_dir: documentation
+strict: true
+
+theme:
+  name: material
+  palette:
+    - scheme: default
+      primary: indigo
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    - scheme: slate
+      primary: indigo
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  features:
+    - navigation.instant
+    - navigation.tracking
+    - content.code.copy
+    - search.suggest
+    - search.highlight
+
+nav:
+  - Home: index.md
+  - Setup: platform-setup.md
+  - Business overview: business.md
+  - Architecture: architecture.md
+  - Configuration: configuration.md
+  - Glossary: glossary.md
+  - Roadmap & implementation notes: roadmap.md
+
+markdown_extensions:
+  - pymdownx.snippets:
+      base_path:
+        - ..
+  - pymdownx.superfences
+  - admonition
+  - tables
+  - toc:
+      permalink: true
+
+plugins:
+  - search
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..d14bca3
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1 @@
+mkdocs-material>=9.5,<10
diff --git a/docs/src/architecture.md b/docs/src/architecture.md
new file mode 100644
index 0000000..9da03b3
--- /dev/null
+++ b/docs/src/architecture.md
@@ -0,0 +1,30 @@
+# Architecture
+
+## System map
+
+```mermaid
+flowchart LR
+  U[Human / Agent Browser] --> W[Next.js Web App]
+  W -->|Price requests| P[Pricing Provider]
+  W -->|Interaction events| B[Backend Ingest API]
+  B --> K[Kafka]
+  K --> A[Airflow + Worker Jobs]
+  A --> R[Redis Model Registry]
+  P -->|Session/global prices| W
+  E[Research Engine + Experiments] --> A
+  E --> R
+```
+
+
+
+## Event and training path (conceptual)
+
+1. **Online:** The browser emits events; the backend publishes to **Kafka**; schedulers and workers consume for ETL and model registry updates.
+2. **Offline:** Notebooks and scripts under `experiments/` transform logs; `**engine/`** runs simulations, training, and benchmarks; artifacts land under paths from `[lib/config.py](https://github.com/velocitatem/PHANTOM/blob/main/lib/config.py)`.
+3. **Feedback:** Trained or rule-based policies surface through the **pricing provider** to the web app.
+
+## Where to read more
+
+- Ports and health checks: [README](https://github.com/velocitatem/PHANTOM/blob/main/README.md) and [Configuration](configuration.md).
+- Formal notation for sessions, $\hat{q}$, and mixture demand: **Chapter 3 (Methodology)** in the thesis PDF.
+
diff --git a/docs/src/business.md b/docs/src/business.md
new file mode 100644
index 0000000..a6dc8bb
--- /dev/null
+++ b/docs/src/business.md
@@ -0,0 +1,21 @@
+# Business overview
+
+PHANTOM targets **platform operators and researchers** who need to:
+
+1. **Observe** session-level behavior and price quotes together (trajectories and policies—not just clicks).
+2. **Separate** human-driven demand signals from agent-mediated reconnaissance where possible (distinguishability and contamination \alpha in the thesis).
+3. **Evaluate** pricing policies that remain useful when **Cost of Information (COI)** is under pressure from automated querying (formal COI framework and theorem in the thesis PDF).
+
+## What this product is not
+
+- A drop-in fraud API that returns “bot score” for every request without your event schema.
+- A certified compliance guarantee for regulated pricing: it is a **research stack** with configurable experiments.
+- A hosted SaaS: you run the stack (or adapt components) under your infrastructure policy.
+
+## Self-service story (ideal path)
+
+A team connects their **catalog** (today: Supabase-backed flows in this repo), streams **interaction events** through the ingest path, runs **labeled or weak-labeled** human vs agent sessions, estimates **behavioral kernels**, varies **contamination** in simulation, and **trains or benchmarks** robust policies via `engine/`. Steps and caveats are in [Setup](platform-setup.md) (same content as root `SETUP.md`).
+
+## Thesis link
+
+Problem statement, contributions, and research questions: **Introduction** and abstract in the [thesis PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf).
\ No newline at end of file
diff --git a/docs/src/configuration.md b/docs/src/configuration.md
new file mode 100644
index 0000000..73d7438
--- /dev/null
+++ b/docs/src/configuration.md
@@ -0,0 +1,63 @@
+# Configuration reference
+
+This page condenses tables from `[README.md](https://github.com/velocitatem/PHANTOM/blob/main/README.md)` and points to code. Authoritative env templates: `[.env.example](https://github.com/velocitatem/PHANTOM/blob/main/.env.example)`, `[.env.sweep.example](https://github.com/velocitatem/PHANTOM/blob/main/.env.sweep.example)`.
+
+## Core runtime (`.env`)
+
+
+| Variable                        | Purpose                        | Typical value           |
+| ------------------------------- | ------------------------------ | ----------------------- |
+| `STORE_MODE`                    | Web mode (`hotel` / `airline`) | `hotel`                 |
+| `BACKEND_PORT`                  | Backend API                    | `5000`                  |
+| `PROVIDER_PORT`                 | Pricing provider               | `5001`                  |
+| `KAFKA_HOST`                    | Kafka broker host              | `localhost`             |
+| `KAFKA_PORT`                    | Kafka port                     | `9092`                  |
+| `REDIS_PORT`                    | Redis port                     | `6377`                  |
+| `REDPANDA_CONSOLE_PORT`         | Kafka UI                       | `8084` (see compose)    |
+| `NEXT_PUBLIC_SUPABASE_URL`      | Catalog / data                 | required for full stack |
+| `NEXT_PUBLIC_SUPABASE_ANON_KEY` | Catalog / data                 | required                |
+| `AIRFLOW_FERNET_KEY`            | Airflow                        | required                |
+| `AIRFLOW_SECRET_KEY`            | Airflow web                    | required                |
+
+
+Web client validation: `[web/src/lib/config.ts](https://github.com/velocitatem/PHANTOM/blob/main/web/src/lib/config.ts)`.
+
+## Training / sweeps (`.env.sweep`)
+
+
+| Variable        | Purpose                                         |
+| --------------- | ----------------------------------------------- |
+| `WANDB_API_KEY` | Weights & Biases                                |
+| `WANDB_ENTITY`  | Optional override                               |
+| `WANDB_PROJECT` | Project name (default `capstone`)               |
+| `GITHUB_TOKEN`  | Bootstrap / workers                             |
+| `SWEEP_ID`      | Sweep agents (`train.agent`, `benchmark.agent`) |
+
+
+## Path overrides (`PHANTOM_*`)
+
+Defined in `[lib/config.py](https://github.com/velocitatem/PHANTOM/blob/main/lib/config.py)`:
+
+
+| Variable                     | Default (conceptual)                |
+| ---------------------------- | ----------------------------------- |
+| `PHANTOM_DATA_DIR`           | `data/`                             |
+| `PHANTOM_EXPERIMENTS_DIR`    | `experiments/`                      |
+| `PHANTOM_SIM_RUNS_DIR`       | `sim/rl/runs`                       |
+| `PHANTOM_MODEL_REGISTRY_DIR` | `data/models`                       |
+| `PHANTOM_COLLECTED_DATA_DIR` | `experiments/agents/collected_data` |
+
+
+## Makefile entrypoints
+
+
+| Goal             | Command                                     |
+| ---------------- | ------------------------------------------- |
+| Platform up/down | `make platform.up` / `make platform.down`   |
+| Web dev          | `make web.dev`                              |
+| Train            | `make train` (+ `LOCAL_TRAIN_ARGS`)         |
+| Benchmark        | `make benchmark` (+ `LOCAL_BENCHMARK_ARGS`) |
+| Docs site        | `make docs.platform`                        |
+
+
+See `make help` for the full list.
\ No newline at end of file
diff --git a/docs/src/glossary.md b/docs/src/glossary.md
new file mode 100644
index 0000000..5774101
--- /dev/null
+++ b/docs/src/glossary.md
@@ -0,0 +1,17 @@
+# Glossary
+
+Short definitions point to the thesis **Terminology** appendix in the [PDF](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf) for full precision.
+
+| Term | Meaning (operational) |
+| --- | --- |
+| **COI (Cost of Information)** | Expected price premium above a floor under the platform’s policy; thesis KPI for pricing power. |
+| **Trajectory \(\tau_s\)** | Ordered session events used as the behavioral record. |
+| **Demand proxy \(\hat{q}\)** | Weighted aggregation of actions—what the platform observes instead of true demand. |
+| **Contamination \(\alpha\)** | Agent share in the mixture demand model (thesis); not automatically “% of bots” in raw logs. |
+| **Transition kernel \(\hat{\mathcal{T}}\)** | MLE Markov model over behavioral states / events for class \(H\) or \(A\). |
+| **\(\Delta_H,\Delta_A\)** | Divergence scores vs human/agent prototypes (thesis notation). |
+| **\(f(\tau)\)** | Weak agent probability from trajectory (implementation: `engine/lib/coi.py`). |
+| **\(\mathcal{G}(\alpha)\)** | Contamination generator: synthetic agent trajectories to reach mixture level \(\alpha\). |
+| **DR-RL** | Distributionally robust reinforcement learning training narrative in the thesis. |
+| **Ambiguity set / Wasserstein** | Robust optimization neighborhood around an empirical demand law. |
+| **Kappa–Lambda architecture** | Thesis term for streaming (online) vs batch/offline learning loops. |
diff --git a/docs/src/index.md b/docs/src/index.md
new file mode 100644
index 0000000..caa59e9
--- /dev/null
+++ b/docs/src/index.md
@@ -0,0 +1,21 @@
+# PHANTOM platform documentation
+
+Welcome. This site mirrors the **operator and research** documentation for the PHANTOM repository: a research platform for studying **dynamic pricing** under **LLM-mediated browsing and transaction orchestration**, with ties to the academic thesis.
+
+## Start here
+
+| Document | Audience |
+| --- | --- |
+| [Setup](platform-setup.md) | Full walkthrough: Docker/web/ingest, kernels, contamination, RL training, and audit—content from `SETUP.md` in the repo. |
+| [Configuration reference](configuration.md) | Env vars, paths, and Makefile entrypoints in one place. |
+| [Roadmap & implementation notes](roadmap.md) | What is turnkey vs research-grade; thesis vs code. |
+
+## Canonical sources in the repo
+
+- Thesis PDF: [thesis-latest.pdf](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
+- Root onboarding: single file [`SETUP.md`](https://github.com/velocitatem/PHANTOM/blob/main/SETUP.md) (included on this site via snippets—edit that file to change content).
+- Quick start and command tables: [`README.md`](https://github.com/velocitatem/PHANTOM/blob/main/README.md)
+
+## Academic project page
+
+The research landing page (figures, abstract, links) is the site root on GitHub Pages: [velocitatem.github.io/PHANTOM/](https://velocitatem.github.io/PHANTOM/). Open **Documentation** in the Project Links menu there to return to this subsite.
diff --git a/docs/src/platform-setup.md b/docs/src/platform-setup.md
new file mode 100644
index 0000000..682f010
--- /dev/null
+++ b/docs/src/platform-setup.md
@@ -0,0 +1,5 @@
+# Setup
+
+The content below is included from the repository root file `SETUP.md` (single source of truth: platform bring-up, kernels, contamination, RL training, and thesis pointers by chapter).
+
+--8<-- "SETUP.md"
diff --git a/docs/src/roadmap.md b/docs/src/roadmap.md
new file mode 100644
index 0000000..d16f496
--- /dev/null
+++ b/docs/src/roadmap.md
@@ -0,0 +1,26 @@
+# Roadmap & implementation notes
+
+This page is the **honesty pass** from the documentation plan: what clients can expect today versus what remains research-heavy.
+
+## Turnkey in this repository
+
+- **Local stack:** Docker Compose services for backend, Kafka, Redis, Airflow, pricing provider, etc.; Next.js via `make web.dev` (see [Platform setup](platform-setup.md)).
+- **Demo verticals:** `hotel` and `airline` storefront modes.
+- **Engine:** Benchmarks and training entrypoints (`make train`, `make benchmark`), KL-based agent scoring in `[engine/lib/coi.py](https://github.com/velocitatem/PHANTOM/blob/main/engine/lib/coi.py)`, simulator mixing in `[engine/engine.py](https://github.com/velocitatem/PHANTOM/blob/main/engine/engine.py)`.
+- **Orchestration hooks:** Ray/TPU scripts (`submit_ray_job.sh`, `make tpu.ray.`*), W&B sweep agents, Docker trainer publish target.
+
+## Usually requires custom engineering
+
+- **Non-Supabase catalog** or checkout flows without adapting the web + backend contracts.
+- **Production SLAs** on Kafka, schema registry, or PII boundaries for your jurisdiction.
+- **Tight coupling** to a legacy pricing engine without mapping its API to the provider abstraction.
+
+## Thesis vs code
+
+- The **thesis** states theorems and constructions (COI erosion, kernels, \mathcal{G}(\alpha), DR-RL).  
+- The **codebase** implements a **subset** of that story for experiments: verify CLI flags and simulator assumptions before claiming 1:1 equivalence with every equation.
+- **Catalog-scale kernel expansion** is discussed in **Chapter 3** with explicit validation caveats—do not assume row-stochasticity and Markov structure are automatically preserved at full product cardinality without review.
+
+## Suggested client messaging
+
+Position PHANTOM as a **reproducible research and evaluation stack** for agent-aware pricing, with a path to custom integration—not as a black-box “turn on anti-agent pricing” product without data and engineering investment.
\ No newline at end of file
diff --git a/paper/src/chapters/mdp_agent.pdf b/paper/src/chapters/mdp_agent.pdf
index 17d299e68d5e4207c1cb1833a0049fb74da537c1..b0911f18c069e6ea57ee7992c27bf88ad3495015 100644
GIT binary patch
delta 322
zcmV-I0logTRkKyFeJFoXYlAQph2Q-Z=VeS8Xkx9kMfy;MGR8n`-^Lz73|1IMk~r3X
zzeKBJ<mKLvdvea@QV@@(*b*-W-BOS4u-T<vo+G;62Z5+!t<WBT=&frsJ`jVT#^;0z
zIrawBJ02%RrDQX5q2P1YDG3_VEKK0vj2ZMHiYD4BHrzU{L#cn-Aeg^Vf|*YCB{Kqk
zd5B4zl7wW`7BqPw@iQU8V)7RYPuAM$E2f0Y;gU_@T{Im#$sZe+*cWHX9yF!<HFj^T
zKqFZdXC=o0dF~q5&n^>P(8W^4dObKdeqZn_j2gw#6?Uo=-yIkA`Q)HY?RYJ;92FND
zo7MATh1|^HwY?R?zXc0s*q3Y{``H^j*0V#u_8)IVlaVPO0yjC6vMJ6CML;ploeT^B
U<*f-ilZ+}L2r&vJB}Gq03ayiqdjJ3c

delta 323
zcmV-J0lfaRRkT&GeJFolYlAQp#ozrD=VeS8Xs)&P57LLWP{tTY+qbcY5Q7zlktB}w
z+b_}T7<sxs?#VfaOJD)zSh64mEh(TC7OOlc3MAKiCy|uAmL>p@yfu}<2U0Mc@j0M_
z>RSs6cDq4QYE|_@YJ`F}T7hC(#~H$#wVl}}$v|t(yIUhntaN`JMdLSGa@(l($gM<p
z+@>teS;p2x%N3&+mOe8UO(uV_@Z_w$zT{f^96s3q-euMBz4~!+nS1Fv+kkS~%&}YR
z1P0lxIBC^)C<;ILd2|`*gf5P1-kQ$4iTi|KVRbL3>EO1G^1Bi8eA;{HQhR=uM)g`q
zgT?H5aYA9o@WvM1;a|Z6xBN@hkACh3k9uzD7yFHGNt2K%9|AZqld&nz48=e(&0P!(
V0Ohy|Jd=wmAP6!FB_%~qMhZNeon`<4

diff --git a/paper/src/chapters/mdp_human.pdf b/paper/src/chapters/mdp_human.pdf
index af63cd53d332f9ca87871d6a0d72294ce431bc4a..cced37da2848d24b54e43f309be7bb7eee4c105e 100644
GIT binary patch
delta 291
zcmV+;0o?wvU9nxTdn|v!YQr!PMDPBJxs=8fY)Nr!H<%ofKq#dUlHN)WLKcppSVodd
z^Y>lZaZ1r;_pzgyWjR>HbF5e-z*jut1(rn~RTa|rYp;<WWusjLAbo9XhYtuaobWlK
zof$e09&I+G;%H4hC~dJ&vb6?0;boFxaq+%)>ogr{V`YD7l}mq&ZQ^+P#%k$X)9t0#
zSnSt=iJWDu<W%tNiHS$X;@RXc79Pkt=#SFskRv1;!JDpIxivo&m(df?c?prPihJzN
zlfWUnD-PBSJ*p}+p`TnvI-@gDD?8T*H+7xy3%nil-0f&;ufJQRPls)QlsfQ}c4n|j
pJ1p;>XA&wug?AQohkpz1q?cc&x$Q?cxUEM+zxo$%NwbeFA_1cki~s-t

delta 291
zcmV+;0o?wvU9nxTdn|v=YQr!PgztWexs*B-Y)SE7gUKNdgi;D7>8<o2WZ@W!WhA*Y
zZ(qrdU5ZZov7?z`DM&ywq$FUVDGg|jZJq^XiTM3CNW?X7r3nDUuU%vC0S1CIJ||SE
zvA3YXem^N{t(s9tjZpATE6|7*QG)Pl?O=9sJkeJ3;o1okDcyf=HuE=Ha@(o?$gM<p
z+%dv3l8}PZG$-_lut!2Ri^*RsJUDBwuep{!hfg+vH`#RjpnhCj;vRd>3a~8A*Vw&v
z0)u2#oU|GTl%*g1JiAPEK^I3g@6F)d#BISZuzHlsRJpCA{O*K2pAH_n)SjQEQKJ^p
pV7q!=oKV;~ym1zH_*byvmVc@C-p}3OUe7K40{?|?PP30KA^{6VkLv&c

diff --git a/paper/src/main.tex b/paper/src/main.tex
index 2046342..c3422cc 100644
--- a/paper/src/main.tex
+++ b/paper/src/main.tex
@@ -18,7 +18,7 @@
 \end{titlepage}
 
 \begin{abstract}
-With accelerated growth of Lager Language Model agents in e-commerce a novel adversarial dynamic to digital markets emerges. This paper address the vulnerability of dynamic pricing systems to AI intermediaries that decouple the information gather stages from the transaction execution. By conducing reconnaissance isolates sessions, agents circumvent the ``Cost of Information'' (COI) defined as the accumulated price premium typically thought demand expression estimators.
+With accelerated growth of Large Language Model agents in e-commerce a novel adversarial dynamic to digital markets emerges. This paper address the vulnerability of dynamic pricing systems to AI intermediaries that decouple the information gather stages from the transaction execution. By conducing reconnaissance isolates sessions, agents circumvent the ``Cost of Information'' (COI) defined as the accumulated price premium typically thought demand expression estimators.
 We formally define this phenomenon and derive the Cost of Information Theorem, proving that as the saturation of independent, utility-maximizing agents increases, the platform’s ability to sustain a COI converges to zero, rendering standard dynamic pricing mechanisms incentive-incompatible.
 To respond to this threat we propose a defensive framework which integrates behavioral economics with Adversarially Distributionally Robust Optimization (DRO). We introduce a custom e-commerce research platform built on hybrid Kappa-Lambda architecture, designed to capture and simulate high-fidelity controlled interaction trajectories. We further demonstrate through modeling that human and agent behaviors exhibit distinct transition probability kernels, enabling the construction of discriminative models based on Kullback-Leibler divergence.
 These behavioral signals serve as inputs for a Distributionally Robust Reinforcement Learning (DR-RL) agent. We formulate the pricing problem as a Stackelberg game where the learner optimizes against an ambiguity set of demand distributions defined by the Wasserstein distance. This approach allows the pricing policy to remain robust against non-stationary contamination without overfitting to deterministic demand curves. The research validates a mechanism for preserving margin integrity and market equilibrium in an agent-mediated economy, while minimizing degradation to the legitimate human user experience (UX).