refined lit review and soruces

2026-07-16 01:53:37 +00:00 · 2026-01-20 16:23:51 +01:00
parent ce0026a61e
commit 49d898f457
3 changed files with 130 additions and 8 deletions
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -364,10 +364,12 @@ Volume: 21},
 	file = {PDF:/home/velocitatem/Zotero/storage/QNXZJLRM/S2444883425000038.pdf:application/pdf},
 }

-@misc{ghaffary_amazon_nodate,
+@misc{ghaffary_amazon_2025,
 	title = {Amazon {Sues} to {Stop} {Perplexity} {From} {Using} {AI} {Tool} to {Buy} {Stuff}},
 	url = {https://www.bloomberg.com/news/articles/2025-11-04/amazon-demands-perplexity-stop-ai-agent-from-making-purchases},
 	author = {Ghaffary, Shirin and Day, Matt},
+	month = nov,
+	year = {2025},
 	file = {PDF:/home/velocitatem/Zotero/storage/IQL6FPWE/Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff - Bloomberg.pdf:application/pdf},
 }

@@ -423,3 +425,113 @@ Volume: 21},
 	keywords = {Computer Science - Computation and Language},
 	file = {PDF:/home/velocitatem/Zotero/storage/3Z2XK4QC/Ganie - 2025 - Uncertainty in Authorship Why Perfect AI Detection Is Mathematically Impossible.pdf:application/pdf},
 }
+
+@article{shi_distributionally_nodate,
+	title = {Distributionally {Robust} {Model}-{Based} {Oﬄine} {Reinforcement} {Learning} with {Near}-{Optimal} {Sample} {Complexity}},
+	abstract = {This paper concerns the central issues of model robustness and sample eﬃciency in oﬄine reinforcement learning (RL), which aims to learn to perform decision making from history data without active exploration. Due to uncertainties and variabilities of the environment, it is critical to learn a robust policy—with as few samples as possible—that performs well even when the deployed environment deviates from the nominal one used to collect the history dataset. We consider a distributionally robust formulation of oﬄine RL, focusing on tabular robust Markov decision processes with an uncertainty set speciﬁed by the Kullback-Leibler divergence in both ﬁnite-horizon and inﬁnite-horizon settings. To combat with sample scarcity, a model-based algorithm that combines distributionally robust value iteration with the principle of pessimism in the face of uncertainty is proposed, by penalizing the robust value estimates with a carefully designed data-driven penalty term. Under a mild and tailored assumption of the history dataset that measures distribution shift without requiring full coverage of the state-action space, we establish the ﬁnite-sample complexity of the proposed algorithms. We further develop an informationtheoretic lower bound, which suggests that learning RMDPs is at least as hard as the standard MDPs when the uncertainty level is suﬃcient small, and corroborates the tightness of our upper bound up to polynomial factors of the (eﬀective) horizon length for a range of uncertainty levels. To the best our knowledge, this provides the ﬁrst provably near-optimal robust oﬄine RL algorithm that learns under model uncertainty and partial coverage.},
+	language = {en},
+	author = {Shi, Laixi and Chi, Yuejie},
+	file = {PDF:/home/velocitatem/Zotero/storage/K56G4EIP/Shi and Chi - Distributionally Robust Model-Based Oﬄine Reinforcement Learning with Near-Optimal Sample Complexity.pdf:application/pdf},
+}
+
+@article{dutting_mechanism_nodate,
+	title = {Mechanism {Design} for {Large} {Language} {Models} ({Extended} {Abstract})},
+	abstract = {We investigate auction mechanisms for AIgenerated content, focusing on applications like ad creative generation. In our model, agents’ preferences over stochastically generated content are encoded as large language models (LLMs). We propose an auction format that operates on a tokenby-token basis, and allows LLM agents to inﬂuence content creation through single dimensional bids. We formulate two desirable incentive properties and prove their equivalence to a monotonicity condition on output aggregation. This equivalence enables a second-price rule design, even absent explicit agent valuation functions. Our design is supported by demonstrations on a publicly available LLM.},
+	language = {en},
+	author = {Dütting, Paul and Mirrokni, Vahab and Leme, Renato Paes and Xu, Haifeng and Zuo, Song},
+	file = {PDF:/home/velocitatem/Zotero/storage/2ABDEYDN/Dütting et al. - Mechanism Design for Large Language Models (Extended Abstract).pdf:application/pdf},
+}
+
+@misc{fcmi_machine_2025,
+	title = {Machine {Speed} {Markets}: {AI} {Agent} {Market} {Strategy} \& {Growth}},
+	shorttitle = {Machine {Speed} {Markets}},
+	url = {https://www.360strategy.co.uk/post/machine-speed-markets-ai-agents},
+	abstract = {Recent research by NBER economists suggests these AI agents in particular, could drive a "Coasean singularity," a point where transaction costs fall towards zero, radically reshaping how markets function. In essence, tasks like finding information, negotiating deals, and enforcing contracts which are traditionally costly frictions in commerce, may become nearly instantaneous and costless.},
+	language = {en},
+	urldate = {2026-01-20},
+	journal = {360 Strategy},
+	author = {FCMi, CMgr, Mark Evans MBA},
+	month = nov,
+	year = {2025},
+	file = {Snapshot:/home/velocitatem/Zotero/storage/Z22P9JJH/machine-speed-markets-ai-agents.html:text/html},
+}
+
+@article{coase_nature_1937,
+	title = {The {Nature} of the {Firm}},
+	volume = {4},
+	issn = {1468-0335},
+	url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1468-0335.1937.tb00002.x},
+	doi = {10.1111/j.1468-0335.1937.tb00002.x},
+	language = {en},
+	number = {16},
+	urldate = {2026-01-20},
+	journal = {Economica},
+	author = {Coase, R. H.},
+	year = {1937},
+	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1468-0335.1937.tb00002.x},
+	pages = {386--405},
+	file = {Full Text PDF:/home/velocitatem/Zotero/storage/TABLLPEU/Coase - 1937 - The Nature of the Firm.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/Q5RFW9LJ/j.1468-0335.1937.tb00002.html:text/html},
+}
+
+@misc{fish_algorithmic_2025,
+	title = {Algorithmic {Collusion} by {Large} {Language} {Models}},
+	url = {http://arxiv.org/abs/2404.00806},
+	doi = {10.48550/arXiv.2404.00806},
+	abstract = {The rise of algorithmic pricing raises concerns of algorithmic collusion. We conduct experiments with algorithmic pricing agents based on Large Language Models (LLMs). We find that LLM-based pricing agents quickly and autonomously reach supracompetitive prices and profits in oligopoly settings and that variation in seemingly innocuous phrases in LLM instructions (“prompts”) may substantially influence the degree of supracompetitive pricing. Off-path analysis using novel techniques uncovers price-war concerns as contributing to these phenomena. Our results extend to auction settings. Our findings uncover unique challenges to any future regulation of LLM-based pricing agents, and AI-based pricing agents more broadly.},
+	language = {en},
+	urldate = {2026-01-20},
+	publisher = {arXiv},
+	author = {Fish, Sara and Gonczarowski, Yannai A. and Shorrer, Ran I.},
+	month = sep,
+	year = {2025},
+	note = {arXiv:2404.00806 [econ]},
+	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Science and Game Theory, Economics - General Economics},
+	file = {PDF:/home/velocitatem/Zotero/storage/QHWVISCZ/Fish et al. - 2025 - Algorithmic Collusion by Large Language Models.pdf:application/pdf},
+}
+
+@misc{hardt_strategic_2015,
+	title = {Strategic {Classification}},
+	url = {http://arxiv.org/abs/1506.06980},
+	doi = {10.48550/arXiv.1506.06980},
+	abstract = {Machine learning relies on the assumption that unseen test instances of a classiﬁcation problem follow the same distribution as observed training data. However, this principle can break down when machine learning is used to make important decisions about the welfare (employment, education, health) of strategic individuals. Knowing information about the classiﬁer, such individuals may manipulate their attributes in order to obtain a better classiﬁcation outcome. As a result of this behavior—often referred to as gaming—the performance of the classiﬁer may deteriorate sharply. Indeed, gaming is a well-known obstacle for using machine learning methods in practice; in ﬁnancial policy-making, the problem is widely known as Goodhart’s law. In this paper, we formalize the problem, and pursue algorithms for learning classiﬁers that are robust to gaming.},
+	language = {en},
+	urldate = {2026-01-20},
+	publisher = {arXiv},
+	author = {Hardt, Moritz and Megiddo, Nimrod and Papadimitriou, Christos and Wootters, Mary},
+	month = nov,
+	year = {2015},
+	note = {arXiv:1506.06980 [cs]},
+	keywords = {Computer Science - Machine Learning},
+	file = {PDF:/home/velocitatem/Zotero/storage/HNCDYGWS/Hardt et al. - 2015 - Strategic Classification.pdf:application/pdf},
+}
+
+@misc{liu_contextual_2024,
+	title = {Contextual {Dynamic} {Pricing} with {Strategic} {Buyers}},
+	url = {http://arxiv.org/abs/2307.04055},
+	doi = {10.48550/arXiv.2307.04055},
+	abstract = {Personalized pricing, which involves tailoring prices based on individual characteristics, is commonly used by firms to implement a consumer-specific pricing policy. In this process, buyers can also strategically manipulate their feature data to obtain a lower price, incurring certain manipulation costs. Such strategic behavior can hinder firms from maximizing their profits. In this paper, we study the contextual dynamic pricing problem with strategic buyers. The seller does not observe the buyer's true feature, but a manipulated feature according to buyers' strategic behavior. In addition, the seller does not observe the buyers' valuation of the product, but only a binary response indicating whether a sale happens or not. Recognizing these challenges, we propose a strategic dynamic pricing policy that incorporates the buyers' strategic behavior into the online learning to maximize the seller's cumulative revenue. We first prove that existing non-strategic pricing policies that neglect the buyers' strategic behavior result in a linear \$Ω(T)\$ regret with \$T\$ the total time horizon, indicating that these policies are not better than a random pricing policy. We then establish that our proposed policy achieves a sublinear regret upper bound of \$O({\textbackslash}sqrt\{T\})\$. Importantly, our policy is not a mere amalgamation of existing dynamic pricing policies and strategic behavior handling algorithms. Our policy can also accommodate the scenario when the marginal cost of manipulation is unknown in advance. To account for it, we simultaneously estimate the valuation parameter and the cost parameter in the online pricing policy, which is shown to also achieve an \$O({\textbackslash}sqrt\{T\})\$ regret bound. Extensive experiments support our theoretical developments and demonstrate the superior performance of our policy compared to other pricing policies that are unaware of the strategic behaviors.},
+	language = {en},
+	urldate = {2026-01-20},
+	publisher = {arXiv},
+	author = {Liu, Pangpang and Yang, Zhuoran and Wang, Zhaoran and Sun, Will Wei},
+	month = jun,
+	year = {2024},
+	note = {arXiv:2307.04055 [stat]},
+	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Science and Game Theory, Computer Science - Machine Learning, Statistics - Machine Learning},
+	file = {PDF:/home/velocitatem/Zotero/storage/MVJNULK3/Liu et al. - 2024 - Contextual Dynamic Pricing with Strategic Buyers.pdf:application/pdf},
+}
+
+@techreport{dhir_http_2025,
+	type = {Internet {Draft}},
+	title = {{HTTP} {Agent} {Profile} ({HAP}): {Authenticated} and {Monetized} {Agent} {Traffic} on the {Web}},
+	shorttitle = {{HTTP} {Agent} {Profile} ({HAP})},
+	url = {https://datatracker.ietf.org/doc/draft-dhir-http-agent-profile},
+	abstract = {Autonomous agents such as LLM-powered crawlers, browser-integrated assistants, and task-oriented bots are rapidly becoming first-class HTTP clients on the Web. Today’s infrastructure largely assumes a human behind a browser and monetizes content through advertising and coarse subscriptions. Automated agents consume content at scale without rendering pages or viewing ads, exacerbating bot-mitigation arms races and economic misalignment between content providers and AI systems. This document describes an HTTP Agent Profile (HAP) that enables: (1) cryptographic authentication of agent traffic using HTTP Message Signatures; (2) clear separation between human and agent traffic using privacy-preserving human tokens; and (3) protocol-level value exchange for agents via HTTP status code 402 ("Payment Required") and pluggable micropayment mechanisms. The profile reuses existing HTTP features and is designed for incremental deployment via reverse proxies, CDNs, and agent libraries.},
+	number = {draft-dhir-http-agent-profile-00},
+	urldate = {2026-01-20},
+	institution = {Internet Engineering Task Force},
+	author = {Dhir, Sanat},
+	month = nov,
+	year = {2025},
+	note = {Num Pages: 13},
+}
--- a/paper/src/chapters/01-intro.tex
+++ b/paper/src/chapters/01-intro.tex
@@ -18,7 +18,7 @@ The current innovation boom in generative artificial intelligence and its applic

 The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \parencite{imperva_rapid_2025}.

-The industry has already seen legal action in cases like Amazon against Perplexity \parencite{ghaffary_amazon_nodate}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
+The industry has already seen legal action in cases like Amazon against Perplexity \parencite{ghaffary_amazon_2025}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.

 \subsection{Solution Space Overview}
 Dynamic pricing systems, as presented by \textcite{mueller_low-rank_2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented by \textcite{amjad_censored_2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -1,22 +1,23 @@
 \section{Literature Review}

-To better understand all wedges of the work, we must start by exploring the nature of agents and agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
+To better understand all wedges of the current works, we must start by exploring the nature of agents, agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \parencite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \parencite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.

 \subsection{Agent Taxonomy and Definitions}

 An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \textcite{russell_artificial_nodate} is further developed in an economic context by \textcite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
 A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes \parencite{xia_evaluation-driven_2025}.

-We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
+We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate; this is linked to the lack of grounding of these agents and their inability of handling unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.

 We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.

 \subsection{Economic Agents: From Homo Economicus to Machina Economicus}

-Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \textcite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \textcite{varian_economic_1995}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes \parencite{xie_osworld_nodate}. Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement \parencite{imperva_rapid_2025}. In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \textcite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \textcite{varian_economic_1995} due to its expected utility maximizing nature. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes \parencite{xie_osworld_nodate}. Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement \parencite{imperva_rapid_2025}. In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.

 This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \textcite{parkes_economic_2015}.

+A HAP (HTTP Agent Profile) protocol has been developed as an internet draft by \textcite{dhir_http_2025} in an effort to separate agentic and human internet traffic, however the majority adoption by both the sellers and agent providers would be required for the implementation of such a solution.

 \subsection{Problem Evidence and Market Impact}

@@ -25,16 +26,19 @@ The statistical issue of contamination in dynamic pricing systems that observe d
 When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy \parencite{mullapudi_reinforcement_nodate}.


+
+
 %Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries

 \subsection{Theoretical Foundations: Economic Parallels}


-
 Early hints of exploration of prices in a standard English auction explored by \textcite{varian_economic_1995} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.

 Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.

+The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation. As proposed by \textcite{shahidi_coasean_nodate} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets. As argued by \textcite{coase_nature_1937}, the market participation and time associated with that participation, is critical part of the Coasean transaction cost logic which includes the discovery or relevant pricing within a given market. This process of price discovery without the presence of AI Agents can be time consuming and resource intensive. To build on top of this work we provide a proof of optimal conditions theorised by Coaes as an extension to AI-mediated markets.
+
 % Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance

 % Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing.
@@ -42,5 +46,11 @@ Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013

 \subsection{Landscape of Existing Work}

-Previous efforts in adversarial computer use LLM agents, show how multi-faceted the whole problem is
-Here we can show a market visualization (venn-like-diagram)
+Explorations of the algorithmic collusion by LLMs \parencite{fish_algorithmic_2025} has demonstrated a cross-model tendency of market division with a strong sensitivity to instructions provided in the ``system prompt''. If a dynamic pricing algorithm which is trained to respond to market signals learns to coordinate with competitor agents (or become manipulated by those agents), the market equilibrium is under threat of destabilization. This is particularly true for Q-learning pricing learners as demonstrated by \textcite{calvano_artificial_2018}.
+
+Our effort to combat contamination stems from research by \textcite{hardt_strategic_2015} on strategic classification, in conjunction with \textcite{liu_contextual_2024} who demonstrate a linear regret if contamination is ignored. The strategic classification adversarial effect comes from an effort to manipulate some representative features used in a learning pipeline, which can result in lower prices on loans or lower prices from dynamic pricing algorithms.
+
+To bridge the gap between detection and robust pricing, we look at work in Distributionally Robust Optimization (DRO). As defined by \textcite{kuhn_wasserstein_2024}, DRO provides a framework for decision-making under ambiguity, where the true data distribution is unknown but lies within a ``Wasserstein ball'' of a target distribution. In our context, the ``ambiguity set'' represents the uncertainty introduced by agentic reconnaissance. By optimizing for the worst-case distribution within this set, pricing mechanisms can become resilient to the distributional shifts caused by non-human actors, effectively robustifying the revenue function against the contamination described in our problem statement.
+
+%Previous efforts in adversarial computer use LLM agents, show how multi-faceted the whole problem is
+%Here we can show a market visualization (venn-like-diagram)