From 7eaf19924110531225c90fb256b8a385b10a291a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?=
 <60182044+velocitatem@users.noreply.github.com>
Date: Tue, 30 Dec 2025 02:51:55 +0000
Subject: [PATCH] refactor: enhance Cost of Information framework with
 additional context and illustration

---
 paper/src/chapters/03-methodology.tex | 37 ++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex
index 604edfa..f7bb93b 100644
--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -35,7 +35,7 @@ where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of
 
 \subsection{Cost of Information (COI) Framework}
 
-The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost).
+The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). This also speaks to the financial urgency as a consequence of information asymmetry between the platform and the actors.
 
 \begin{definition}[Cost of Information]
 Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as:
@@ -46,6 +46,38 @@ Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The
 where $F_\pi(p)$ is the cumulative distribution function of prices generated by $\pi$ under standard operating conditions.
 \end{definition}
 
+\begin{figure}[ht]
+    \centering
+    \begin{tikzpicture}[scale=1.2]
+        % Define the Gaussian function: centered at 2
+        \def\bellcurve(#1){1.5 * exp(-0.5*((#1-2)/0.6)^2)}
+
+        % Draw the main axis
+        \draw[->, thick] (0, 0) -- (4.5, 0) node[right] {$p$};
+        \draw[->, thick] (0, 0) -- (0, 2) node[above] {Density};
+
+        \draw[thick, smooth, samples=100] plot[domain=0:4] (\x, {\bellcurve(\x)});
+        \node at (3.2, 1.2) {$f_\pi(p)$};
+
+        % Define p_min and E[p]
+        \def\pmin{0.8}
+        \def\mean{2}
+
+        % Vertical lines
+        \draw[dashed] (\pmin, 0) -- (\pmin, 2.0);
+        \draw[dashed] (\mean, 0) -- (\mean, 2.0);
+
+        % Labels on axis
+        \node[below] at (\pmin, 0) {$\underline{p}$};
+        \node[below] at (\mean, 0) {$\mathbb{E}[p]$};
+
+        \draw[<->, thick, red] (\pmin, 2.0) -- (\mean, 2.0) node[midway, above] {COI};
+
+    \end{tikzpicture}
+    \caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$.}
+    \label{fig:coi_illustration}
+\end{figure}
+
 We now formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic. As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes.
 
 \begin{theorem}[COI Erosion in the Limit]
@@ -181,3 +213,6 @@ where $R(p, d)$ is the revenue function and $\lambda$ weighs the penalty for inf
 
 \subsubsection{Actor Implementation}
 In our simulation, the "Follower" is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$.
+
+
+As part of our reward engineering we think about the UX factor ($UX \in [0,1]$) whic his our proxy for user experience degradation, this is computed as a mixture of contribution from the separability model metric of $\frac{1}{\text{Specificity}}$.