From 73f5dc711982b3919e6233e510f007ffe0e78978 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Mon, 26 Jan 2026 12:18:08 +0100 Subject: [PATCH] lit review document setup --- paper/src/auto/main.el | 5 ---- paper/src/bib/references.bib | 19 +++++++++---- paper/src/chapters/02-literature-review.tex | 31 +++++++++++++++------ paper/src/chapters/03-methodology.tex | 5 ++-- paper/src/main.tex | 18 ++++++------ paper/src/preamble.tex | 1 + 6 files changed, 48 insertions(+), 31 deletions(-) diff --git a/paper/src/auto/main.el b/paper/src/auto/main.el index 6738458..08d7b15 100644 --- a/paper/src/auto/main.el +++ b/paper/src/auto/main.el @@ -12,11 +12,6 @@ "preamble" "chapters/01-intro" "chapters/02-literature-review" - "chapters/03-methodology" - "chapters/04-results" - "chapters/05-discussion" - "chapters/06-conclusion" - "../build/concatenated_code" "article" "art12")) :latex) diff --git a/paper/src/bib/references.bib b/paper/src/bib/references.bib index a4ba6c7..4689742 100644 --- a/paper/src/bib/references.bib +++ b/paper/src/bib/references.bib @@ -323,10 +323,11 @@ Volume: 21}, file = {PDF:/home/velocitatem/Zotero/storage/N9JNXFJW/live-1333-2265-jair.pdf:application/pdf}, } -@techreport{shoham_multiagent_nodate, +@techreport{shoham_multiagent_2009, title = {Multiagent {Systems}: {Algorithmic}, {Game}-{Theoretic}, and {Logical} {Foundations}}, url = {http://www.masfoundations.org.}, author = {Shoham, Yoav and Leyton-Brown, Kevin}, + year = {2009}, keywords = {algorithms, auctions, communication, competition, cooperation, distributed problem solving, game theory, learning, logic, mechanism design, social choice}, file = {PDF:/home/velocitatem/Zotero/storage/QZVYS7V9/shoham09a.pdf:application/pdf}, } @@ -393,7 +394,7 @@ Volume: 21}, month = dec, year = {2007}, note = {Publication Title: Operations Research}, - keywords = {asymptotic analysis, estimation, exploration-exploitation, learning, pricing, Revenue management, value of information}, + keywords = {learning, asymptotic analysis, estimation, exploration-exploitation, pricing, Revenue management, value of information}, file = {PDF:/home/velocitatem/Zotero/storage/SBAIB4V2/Dp_wo_demand_risk_ob_az_posted.pdf:application/pdf}, } @@ -486,7 +487,6 @@ Volume: 21}, journal = {Economica}, author = {Coase, R. H.}, year = {1937}, - note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1468-0335.1937.tb00002.x}, pages = {386--405}, file = {Full Text PDF:/home/velocitatem/Zotero/storage/TABLLPEU/Coase - 1937 - The Nature of the Firm.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/Q5RFW9LJ/j.1468-0335.1937.tb00002.html:text/html}, } @@ -503,7 +503,7 @@ Volume: 21}, month = sep, year = {2025}, note = {arXiv:2404.00806 [econ]}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Science and Game Theory, Economics - General Economics}, + keywords = {Computer Science - Computer Science and Game Theory, Computer Science - Artificial Intelligence, Economics - General Economics}, file = {PDF:/home/velocitatem/Zotero/storage/QHWVISCZ/Fish et al. - 2025 - Algorithmic Collusion by Large Language Models.pdf:application/pdf}, } @@ -535,7 +535,7 @@ Volume: 21}, month = jun, year = {2024}, note = {arXiv:2307.04055 [stat]}, - keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Science and Game Theory, Computer Science - Machine Learning, Statistics - Machine Learning}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Computer Science and Game Theory, Computer Science - Artificial Intelligence}, file = {PDF:/home/velocitatem/Zotero/storage/MVJNULK3/Liu et al. - 2024 - Contextual Dynamic Pricing with Strategic Buyers.pdf:application/pdf}, } @@ -553,3 +553,12 @@ Volume: 21}, year = {2025}, note = {Num Pages: 13}, } + +@misc{noauthor_amazoncom_2026, + title = {Amazon.com {Services} {LLC} v. {Perplexity} {AI}, {Inc}}, + language = {en}, + month = jan, + year = {2026}, + note = {No. 3:25-cv-09514-MMC}, + file = {PDF:/home/velocitatem/Zotero/storage/4JWZSTXJ/Posner - UNITED STATES DISTRICT COURT NORTHERN DISTRICT OF CALIFORNIA SAN FRANCISCO DIVISION.pdf:application/pdf}, +} diff --git a/paper/src/chapters/02-literature-review.tex b/paper/src/chapters/02-literature-review.tex index af50df6..b75624a 100644 --- a/paper/src/chapters/02-literature-review.tex +++ b/paper/src/chapters/02-literature-review.tex @@ -1,6 +1,6 @@ \section{Literature Review} -To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups. +To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. This latter case of agents shopping is currently pending legal action in the case of \textcite{noauthor_amazoncom_2026} which is currently being treated as a violation of the Computer Fraud and Abuse Act. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups. \subsection{Agent Taxonomy and Definitions} @@ -9,7 +9,7 @@ A specific class or taxon of this \textit{machina economicus}, the Large Languag We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_2024} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate; this is linked to the lack of grounding of these agents and their inability of handling unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems. -We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation. +We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) < P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation. \subsection{Economic Agents: From Homo Economicus to Machina Economicus} @@ -26,8 +26,6 @@ The statistical issue of contamination in dynamic pricing systems that observe d When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy \parencite{mullapudi_reinforcement_2025}. - - %Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries \subsection{Theoretical Foundations: Economic Parallels} @@ -35,13 +33,12 @@ When dynamic pricing algorithms operate on highly contaminated or noisy data, th Early hints of exploration of prices in a standard English auction explored by \textcite{varian_economic_1995} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce. -Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems. +Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from intrinsically defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems. The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation. As proposed by \textcite{shahidi_coasean_2025} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets. As argued by \textcite{coase_nature_1937}, the market participation and time associated with that participation, is critical part of the Coasean transaction cost logic which includes the discovery or relevant pricing within a given market. This process of price discovery without the presence of AI Agents can be time consuming and resource intensive. To build on top of this work we provide a proof of optimal conditions theorised by Coaes as an extension to AI-mediated markets. % Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance -% Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing. \subsection{Landscape of Existing Work} @@ -50,7 +47,25 @@ Explorations of the algorithmic collusion by LLMs \parencite{fish_algorithmic_20 Our effort to combat contamination stems from research by \textcite{hardt_strategic_2015} on strategic classification, in conjunction with \textcite{liu_contextual_2024} who demonstrate a linear regret if contamination is ignored. The strategic classification adversarial effect comes from an effort to manipulate some representative features used in a learning pipeline, which can result in lower prices on loans or lower prices from dynamic pricing algorithms. -To bridge the gap between detection and robust pricing, we look at work in Distributionally Robust Optimization (DRO). As defined by \textcite{kuhn_wasserstein_2024}, DRO provides a framework for decision-making under ambiguity, where the true data distribution is unknown but lies within a ``Wasserstein ball'' of a target distribution. In our context, the ``ambiguity set'' represents the uncertainty introduced by agentic reconnaissance. By optimizing for the worst-case distribution within this set, pricing mechanisms can become resilient to the distributional shifts caused by non-human actors, effectively robustifying the revenue function against the contamination described in our problem statement. +To bridge the gap between detection and robust pricing, we look at work in Distributionally Robust Optimization (DRO). As defined by \textcite{kuhn_wasserstein_2024}, DRO provides a framework for decision-making under ambiguity, where the true data distribution is unknown but lies within a ``Wasserstein ball'' of a target distribution. In our context, the ``ambiguity set'' represents the uncertainty introduced by agentic reconnaissance. By optimizing for the worst-case distribution within this set, pricing mechanisms can become resilient to the distributional shifts such as the ones caused by non-human actors, effectively robustifying the revenue function against the contamination described in our problem statement. -%Previous efforts in adversarial computer use LLM agents, show how multi-faceted the whole problem is +In order to create an environment in which prices can be tested against a demand estimate generated by some behavioral model, we take inspiration from the architecture proposed by \textcite{ie_recsim_2019} in the RecSim platform built for recommendation systems. By modeling the distinct user behavior as POMDPs we can generate faithful interactions which allow us to generalize, past the constraint which is also present in recommendation systems, of rarely having enough experience with individual actor's interactions for good recommendations without generalization. The key inspiration comes from the user choice modeling which we translate to a user transition model for each distinct actor type (agent or human). We further consider the possibility of modeling our quantitative research platform using dynamic Bayesian networks for the sake of tractability within the system. The contribution or RecSim enables researchers to better understand learning algorithms in fixed environments, a gap we identify as needing to be bridged within the space of dynamic pricing. + +We also acknowledge the difficulty in similarly affected fields such as authorship, where \textcite{ganie_uncertainty_2025} demonstrate the theoretical limits of the distributional divergence between text authored by a human or large language model. Their approach of computing the divergence between two distributions demonstrates purely theoretically that no classifier can outperform random guessing on their particular task. This is yet another factor to take into consideration when exploring the potential mitigation strategies. + +The setting of our work is quite complex and covers a wide range of topics, each with its own set of issues that further complicate the task at hand. There is however promise in the field of reinforcement learning and adversarial robustness to combat these problems. We can summarize the characteristics learned from the review of our environment as: +\begin{enumerate*}[label=(\roman*)] +\item non-stationary demand with temporal noise $\epsilon_t$ +\item contaminated behavioral signals from mixed human-agent traffic with unknown mixing ratio $\alpha$ +\item partial observability where only demand proxies $\hat{q}$ are available, not true demand $d(\cdot)$ +\item strategic actors capable of feature manipulation to influence pricing outcomes +\item information asymmetry with private valuations $v$ drawn from unknown distributions +\item session-based interactions modeled as POMDPs with trajectories $\tau_s$ +\item low conversion probability for agents: $P(\text{purchase} \mid A) < P(\text{purchase} \mid H)$ +\item distributional uncertainty requiring robust optimization within Wasserstein ambiguity sets +\item potential for adversarial exploitation through false-name bidding and identity whitewashing. +\end{enumerate*} + + +%Previous efforts in adversarial computer use .LLM agents, show how multi-faceted the whole problem is %Here we can show a market visualization (venn-like-diagram) diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex index 0c81ceb..0c68240 100644 --- a/paper/src/chapters/03-methodology.tex +++ b/paper/src/chapters/03-methodology.tex @@ -175,8 +175,7 @@ Our approach can be well summarized by a three-stage division, first we intend t \caption{Overview of the Dynamic Pricing Tasks.} \end{figure} -% TODO: cite google recism here -Our web platform (developed in similar patterns as the RecSim by Google) allows us to setup a controled environment in which we assign tasks to human and agentic actors which are then carried out. Each actor gets a browser assigned experiment identification which is persistent across possibly multiple session identifiers. We then group by experiments and extract all the session interactions (trajectories) which follow the schema formalized below. +Our web platform (developed in similar patterns as the RecSim by \textcite{ie_recsim_2019}) allows us to setup a controled environment in which we assign tasks to human and agentic actors which are then carried out. Each actor gets a browser assigned experiment identification which is persistent across possibly multiple session identifiers. We then group by experiments and extract all the session interactions (trajectories) which follow the schema formalized below. \subsubsection{Interaction Schema} @@ -298,7 +297,7 @@ The robust policy $\pi^*$ is obtained by solving the maximin problem: \end{equation} where $R(p, d)$ is the revenue function and $\lambda$ weighs the penalty for information leakage (COI). We previously defined $\text{COI}$, however to properly connect this concept into the reward structure we need to define a parametrized version which informs us of the leakage of said structure with $\text{COI}(p)$. -Another proposed formulation of the optimal policy would be to adjust the ambiguity set dyanmically over the live computed divergence where $\epsilon(\Delta_H)$ to adjust the ball around or estimator according to each behavioral signal emited through a given trajctory. We state this as a possibility but do not peruse it due to literature suggesting that wesserstine methods do not require absolute continuity and are better with ``black swans'' ( Kuhn et al. - 2024 - Wasserstein Distributionally Robust Optimization Theory and Applications in Machine Learning.pdf ). % TODO: cite this properly +Another proposed formulation of the optimal policy would be to adjust the ambiguity set dyanmically over the live computed divergence where $\epsilon(\Delta_H)$ to adjust the ball around or estimator according to each behavioral signal emited through a given trajctory. We state this as a possibility but do not peruse it due to literature suggesting that wesserstine methods do not require absolute continuity and are better with ``black swans'' \parencite{kuhn_wasserstein_2024}. \subsubsection{Actor Implementation} In our simulation, the "Follower" is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$. diff --git a/paper/src/main.tex b/paper/src/main.tex index 450db4c..fe38b14 100644 --- a/paper/src/main.tex +++ b/paper/src/main.tex @@ -26,18 +26,16 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce \noindent\textbf{Keywords:} Dynamic Pricing, LLM Agents, Adversarial Machine Learning, E-commerce, Behavioral Detection, Reinforcement Learning +\vspace{1em} +\noindent\textbf{Acknowledgments:} Eugene Bykovets, PhD - ETH for helping with problem formulation. This research was supported by the TPU Research Cloud program. +\clearpage \input{chapters/01-intro} \input{chapters/02-literature-review} -\input{chapters/03-methodology} -\input{chapters/04-results} -\input{chapters/05-discussion} -\input{chapters/06-conclusion} - - -\section*{Acknowledgments} -Eugene Bykovets, PhD - ETH for helping with problem formulation. -This research was supported by the TPU Research Cloud program. +% \input{chapters/03-methodology} +% \input{chapters/04-results} +% \input{chapters/05-discussion} +% \input{chapters/06-conclusion} \printbibliography @@ -48,6 +46,6 @@ This research was supported by the TPU Research Cloud program. \item[Agent $A$] An actor of non-human nature, powered by an LLM. \item[Human $H$] An individual human with some job to be done. \end{description} -\input{../build/concatenated_code} +% \input{../build/concatenated_code} \end{document} diff --git a/paper/src/preamble.tex b/paper/src/preamble.tex index a8ef0d5..ad633de 100644 --- a/paper/src/preamble.tex +++ b/paper/src/preamble.tex @@ -5,6 +5,7 @@ \usepackage{amsmath} \usepackage{amsthm} \usepackage{appendix} +\usepackage[inline]{enumitem} % Define theorem environments \newtheorem{theorem}{Theorem}