PHANTOM/paper/src/main.tex

% -*- TeX-master: t -*-
\documentclass[12pt,letterpaper]{article}

\input{preamble}

\begin{document}

\begin{titlepage}
    \centering
    \includegraphics[width=\textwidth]{graphics/banner.png}\\[0.8cm]
    \LARGE\textbf{PHANTOM: Pricing Heuristics Against Non-human Transaction Orchestration Mechanisms}\\[0.5cm]
    \Large\textbf{Daniel Rösel}\\
    \large\textit{Bachelor of Computer Science \& Artificial Intelligence}\\[0.5cm]
    \Large\textit{Supervised by:}\\
    \Large\textbf{Alberto Martín Izquierdo}\\
    \large\textit{IE University, Madrid, Spain}\\[1cm]
    \large\today
\end{titlepage}

\begin{abstract}
With accelerated growth of Lager Language Model agents in e-commerce a novel adversarial dynamic to digital markets emerges. This paper address the vulnerability of dynamic pricing systems to AI intermediaries that decouple the information gather stages from the transaction execution. By conducing reconnaissance isolates sessions, agents circumvent the ``Cost of Information'' (COI) defined as the accumulated price premium typically thought demand expression estimators.
We formally define this phenomenon and derive the Cost of Information Theorem, proving that as the saturation of independent, utility-maximizing agents increases, the platform’s ability to sustain a COI converges to zero, rendering standard dynamic pricing mechanisms incentive-incompatible.
To respond to this threat we propose a defensive framework which integrates behavioral economics with Adversarially Distributionally Robust Optimization (DRO). We introduce a custom e-commerce research platform built on hybrid Kappa-Lambda architecture, designed to capture and simulate high-fidelity controlled interaction trajectories. We further demonstrate through modeling that human and agent behaviors exhibit distinct transition probability kernels, enabling the construction of discriminative models based on Kullback-Leibler divergence.
These behavioral signals serve as inputs for a Distributionally Robust Reinforcement Learning (DR-RL) agent. We formulate the pricing problem as a Stackelberg game where the learner optimizes against an ambiguity set of demand distributions defined by the Wasserstein distance. This approach allows the pricing policy to remain robust against non-stationary contamination without overfitting to deterministic demand curves. The research validates a mechanism for preserving margin integrity and market equilibrium in an agent-mediated economy, while minimizing degradation to the legitimate human user experience (UX).
\end{abstract}

\noindent\textbf{Keywords:} Dynamic Pricing, LLM Agents, Adversarial Machine Learning, E-commerce, Behavioral Detection, Reinforcement Learning

\vspace{1em}
\noindent\textbf{Acknowledgments:} This research was supported by the TPU Research Cloud program, which provided access to Google Cloud TPU accelerators (including TPU v4, v5e, and v6e).

\vspace{0.5em}
\noindent\textbf{Project page:} \url{https://velocitatem.github.io/PHANTOM/}

\clearpage
\input{chapters/01-intro}
\input{chapters/02-literature-review}
\input{chapters/03-methodology}
\input{chapters/04-results}
\input{chapters/05-discussion}
\input{chapters/06-conclusion}

\printbibliography

\clearpage
\appendix
\section{Terminology}
\begin{description}
\item[Agent $A$] An actor of non-human nature, powered by an LLM.
\item[Human $H$] An individual human with some job to be done.
\item[Actor $\theta$] Defines a type of class which is either Agent or Human and has the capability to carry out actions on a web platform.
\item[Platform] Any web-based platform which serves an interface to a collection of items that can be purchased, each at some price $p_i$.
\item[Behavioral Model] A mathematical model predicting what action comes after a series of prior actions.
\item[LLM] Large Language Model served by some provider with the abstracted capability of tool calling.
\item[TPU] Tensor Processing Unit which is a unique kind of chip architecture developed by Google.
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
% TODO: maybe define other things in a similar succient manner
\end{description}

\section{Aggregate Compute Budget Derivation}
\label{app:compute_budget}

The claimed peak throughput of approximately 160\,PFLOPS follows from multiplying the per-chip BF16 peak (from official Google Cloud TPU documentation) by the number of chips in each allocation tier and summing across generations.

\begin{table}[ht]
\centering
\caption{Per-generation contribution to aggregate BF16 throughput.}
\label{tab:compute_derivation}
\begin{tabular}{@{}lrrr@{}}
\toprule
\textbf{TPU Gen.} & \textbf{Chips} & \textbf{Peak BF16/chip (TFLOPS)} & \textbf{Subtotal (TFLOPS)} \\
\midrule
v6e (Trillium) & 128 & 918 & $128 \times 918 = 117{,}504$ \\
v5e            & 128 & 197 & $128 \times 197 = 25{,}216$  \\
v4             &  64 & 275 & $64  \times 275 = 17{,}600$  \\
\midrule
\textbf{Total} & \textbf{320} & & $\mathbf{160{,}320}$ \\
\bottomrule
\end{tabular}
\end{table}

Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.

\section{Full Slope-Test Derivation: Revenue vs. Contamination}
\label{app:alpha_revenue_slope}

This appendix gives the full ordinary least squares computation for the linear effect of contamination on mean revenue. Let
\[
x_i = \texttt{study/alpha}_i, \qquad y_i = \texttt{eval/revenue\_mean}_i,
\]
and fit
\[
y_i = \beta_0 + \beta_1 x_i + \varepsilon_i, \qquad i=1,\dots,n.
\]
The slope test is
\[
H_0: \beta_1 = 0 \qquad \text{vs.} \qquad H_1: \beta_1 \neq 0.
\]

\subsection{Sample moments and least-squares coefficients}

From the data:
\[
n=95, \qquad \bar{x}=0.3810526316, \qquad \bar{y}=303{,}774.6096.
\]
Define
\[
S_{xx}=\sum_{i=1}^{n}(x_i-\bar{x})^2, \qquad S_{xy}=\sum_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y}).
\]
Numerically,
\[
S_{xx}=7.0508947368, \qquad S_{xy}=-427{,}509.4691.
\]
The least-squares slope and intercept are
\[
\hat{\beta}_1 = \frac{S_{xy}}{S_{xx}} = \frac{-427{,}509.4691}{7.0508947368} = -60{,}631.9460,
\]
\[
\hat{\beta}_0 = \bar{y} - \hat{\beta}_1\bar{x} = 303{,}774.6096 - (-60{,}631.9460)(0.3810526316) = 326{,}878.5722.
\]
So the fitted line is
\[
\hat{y} = 326{,}878.5722 - 60{,}631.9460\,x.
\]

\subsection{Residual variance and standard error of the slope}

For each observation, $\hat{y}_i = \hat{\beta}_0 + \hat{\beta}_1 x_i$ and $e_i = y_i - \hat{y}_i$. The residual sum of squares is
\[
\mathrm{SSE} = \sum_{i=1}^{n} e_i^2 = 35{,}721{,}896{,}352.27375.
\]
With $df=n-2=93$,
\[
\mathrm{MSE} = \frac{\mathrm{SSE}}{n-2} = \frac{35{,}721{,}896{,}352.27375}{93} = 384{,}106{,}412.3900.
\]
The slope standard error is
\[
SE(\hat{\beta}_1) = \sqrt{\frac{\mathrm{MSE}}{S_{xx}}} = \sqrt{\frac{384{,}106{,}412.3900}{7.0508947368}} = 7{,}380.8038.
\]

\subsection{t-statistic, p-value, and confidence interval}

Under $H_0: \beta_1=0$,
\[
t = \frac{\hat{\beta}_1}{SE(\hat{\beta}_1)} = \frac{-60{,}631.9460}{7{,}380.8038} = -8.2148,
\]
with $df=93$. The two-sided p-value is
\[
p = 2\,\Pr\left(T_{93} \ge |t|\right) = 1.2038\times 10^{-12}.
\]
The 95\% confidence interval is
\[
\hat{\beta}_1 \pm t_{0.975,93}\,SE(\hat{\beta}_1)
= -60{,}631.9460 \pm (1.9858)(7{,}380.8038)
= [-75{,}288.7597,\,-45{,}975.1324].
\]

\subsection{Effect size and fit statistics}

The sample correlation is $r=-0.64846$, so
\[
R^2 = r^2 = 0.4205.
\]
Hence, 42.05\% of the variation in \texttt{eval/revenue\_mean} is explained by a linear trend in \texttt{study/alpha}.

The slope interpretation is direct:
\[
\hat{\beta}_1 = -60{,}631.9460 \quad \Rightarrow \quad \Delta y \approx -6{,}063.19 \text{ for } \Delta x = +0.1.
\]
From $\alpha=0$ to $\alpha=0.8$, the fitted drop is
\[
0.8\times (-60{,}631.9460) = -48{,}505.5568,
\]
so the model predicts roughly $48{,}506$ lower revenue units on average.

\subsection{Conclusion of the slope test}

The estimated model is
\[
\hat{y}=326{,}878.57-60{,}631.95\,x,
\]
with
\[
t(93)=-8.2148, \qquad p=1.2038\times 10^{-12}, \qquad 95\%\,\text{CI}=[-75{,}288.76,\,-45{,}975.13].
\]
The slope is therefore strongly negative and statistically different from zero.

% \input{../build/concatenated_code}

\end{document}