From 7eaf19924110531225c90fb256b8a385b10a291a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?= <60182044+velocitatem@users.noreply.github.com> Date: Tue, 30 Dec 2025 02:51:55 +0000 Subject: [PATCH] refactor: enhance Cost of Information framework with additional context and illustration --- paper/src/chapters/03-methodology.tex | 37 ++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex index 604edfa..f7bb93b 100644 --- a/paper/src/chapters/03-methodology.tex +++ b/paper/src/chapters/03-methodology.tex @@ -35,7 +35,7 @@ where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of \subsection{Cost of Information (COI) Framework} -The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). +The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). This also speaks to the financial urgency as a consequence of information asymmetry between the platform and the actors. \begin{definition}[Cost of Information] Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as: @@ -46,6 +46,38 @@ Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The where $F_\pi(p)$ is the cumulative distribution function of prices generated by $\pi$ under standard operating conditions. \end{definition} +\begin{figure}[ht] + \centering + \begin{tikzpicture}[scale=1.2] + % Define the Gaussian function: centered at 2 + \def\bellcurve(#1){1.5 * exp(-0.5*((#1-2)/0.6)^2)} + + % Draw the main axis + \draw[->, thick] (0, 0) -- (4.5, 0) node[right] {$p$}; + \draw[->, thick] (0, 0) -- (0, 2) node[above] {Density}; + + \draw[thick, smooth, samples=100] plot[domain=0:4] (\x, {\bellcurve(\x)}); + \node at (3.2, 1.2) {$f_\pi(p)$}; + + % Define p_min and E[p] + \def\pmin{0.8} + \def\mean{2} + + % Vertical lines + \draw[dashed] (\pmin, 0) -- (\pmin, 2.0); + \draw[dashed] (\mean, 0) -- (\mean, 2.0); + + % Labels on axis + \node[below] at (\pmin, 0) {$\underline{p}$}; + \node[below] at (\mean, 0) {$\mathbb{E}[p]$}; + + \draw[<->, thick, red] (\pmin, 2.0) -- (\mean, 2.0) node[midway, above] {COI}; + + \end{tikzpicture} + \caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$.} + \label{fig:coi_illustration} +\end{figure} + We now formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic. As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes. \begin{theorem}[COI Erosion in the Limit] @@ -181,3 +213,6 @@ where $R(p, d)$ is the revenue function and $\lambda$ weighs the penalty for inf \subsubsection{Actor Implementation} In our simulation, the "Follower" is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$. + + +As part of our reward engineering we think about the UX factor ($UX \in [0,1]$) whic his our proxy for user experience degradation, this is computed as a mixture of contribution from the separability model metric of $\frac{1}{\text{Specificity}}$.