From 0d6e9d3a3da41a3a25684a30f6dbe30643a05bd2 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sat, 13 Dec 2025 16:12:28 +0100
Subject: [PATCH] stronger lit review and more sources

---
 paper/src/bib/references.bib                | 39 +++++++++++++++++++++
 paper/src/chapters/02-literature-review.tex | 20 +++++++++--
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/paper/src/bib/references.bib b/paper/src/bib/references.bib
index a7c07b9..ebdf0fc 100644
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -81,3 +81,42 @@
    isbn = {978-1-292-40117-1},
    title = {Artificial Intelligence A Modern Approach Fourth Edition Global Edition}
 }
+@misc{Parkes2015,
+   abstract = {The field of artificial intelligence (AI) strives to build rational agents capable of perceiving the world around them and taking actions to advance specified goals. Put another way, AI researchers aim to construct a synthetic homo economicus, the mythical perfectly rational agent of neoclassical economics.We review progress toward creating this new species of machine, machina economicus, and discuss some challenges in designing AIs that can reason effectively in economic contexts. Supposing that AI succeeds in this quest, or at least comes close enough that it is useful to think about AIs in rationalistic terms, we ask how to design the rules of interaction in multi-agent systems that come to represent an economy of AIs.Theories of normative design from economics may prove more relevant for artificial agents than human agents, with AIs that better respect idealized assumptions of rationality than people, interacting through novel rules and incentive systems quite distinct from those tailored for people.},
+   author = {David C. Parkes and Michael P. Wellman},
+   doi = {10.1126/science.aaa8403},
+   issn = {10959203},
+   issue = {6245},
+   journal = {Science},
+   month = {7},
+   pages = {267-272},
+   pmid = {26185245},
+   publisher = {American Association for the Advancement of Science},
+   title = {Economic reasoning and artificial intelligence},
+   volume = {349},
+   year = {2015}
+}
+
+@article{Xia2025,
+   abstract = {Large Language Models (LLMs) have enabled the emergence of LLM agents, systems capable of pursuing under-specified goals and adapting after deployment. Evaluating such agents is challenging because their behavior is open ended, probabilistic, and shaped by system-level interactions over time. Traditional evaluation methods, built around fixed benchmarks and static test suites, fail to capture emergent behaviors or support continuous adaptation across the lifecycle. To ground a more systematic approach, we conduct a multivocal literature review (MLR) synthesizing academic and industrial evaluation practices. The findings directly inform two empirically derived artifacts: a process model and a reference architecture that embed evaluation as a continuous, governing function rather than a terminal checkpoint. Together they constitute the evaluation-driven development and operations (EDDOps) approach, which unifies offline (development-time) and online (runtime) evaluation within a closed feedback loop. By making evaluation evidence drive both runtime adaptation and governed redevelopment, EDDOps supports safer, more traceable evolution of LLM agents aligned with changing objectives, user needs, and governance constraints.},
+   author = {Boming Xia and Qinghua Lu and Liming Zhu and Zhenchang Xing and Dehai Zhao and Hao Zhang},
+   month = {11},
+   title = {Evaluation-Driven Development and Operations of LLM Agents: A Process Model and Reference Architecture},
+   url = {http://arxiv.org/abs/2411.13768},
+   year = {2025}
+}
+
+@techReport{Varian,
+   abstract = {The eeld of economic mechanism design has been an active area of research in economics for at least 20 years. This eld uses the tools of economics and game theory to design \rules of interaction" for economic transactions that will, in principle , yield some desired outcome. In this paper I provide an overview of this subject for an audience interested in applications to electronic commerce and discuss some special problems that arise in this context.},
+   author = {Hal R Varian},
+   title = {Economic Mechanism Design for Computerized Agents}
+}
+@techReport{Mullapudi,
+   abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
+   author = {Pavan Mullapudi},
+   issue = {4},
+   journal = {International Journal on Science and Technology (IJSAT) IJSAT25049558},
+   keywords = {Index Terms: Dynamic Pricing,Markov Decision Process,Price Optimization,Q-Learning,Reinforcement Learning,Retail Analytics},
+   title = {A Reinforcement Learning Approach to Dynamic Pricing},
+   volume = {16}
+}
diff --git a/paper/src/chapters/02-literature-review.tex b/paper/src/chapters/02-literature-review.tex
index 2896ea9..fc9d0e4 100644
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -4,14 +4,28 @@ To better understand all wedges of the work, we must start by exploring the natu
 
 \subsection{Agent Taxonomy and Definitions}
 
-describe agents as defined in \cite{Russell}
+An agent in the contex of artificial inteligence is generally defined by anything that can reason and act uppon observations of its environments (collected through some sensory inputs) and carry out actions trough effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \cite{Russell} is further developed in an economic context by \cite{Parkes2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
+A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes. \cite{Xia2025}
+
+We must however acknowledge the current SOTA as presented by OSWORLD simulations in \cite{Xie} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% sucess, whereas humans have a higher 72\% sucess rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
 
 \subsection{Economic Agents: From Homo Economicus to Machina Economicus}
 
-Existing behvarioal economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{Parkes2015} is quite apropriate for our case...
-What is the taxonomy and definition of an agent and an actor in this case, a bit more about interaction models in sessions and about dynamic pricing algorithms.
+Existing behvarioal economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{Parkes2015} is quite apropriate for our case, particularly becuase these assumptions of rationality have been argued to be a very adequeate reference for AI research by \cite{Varian}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes. \cite{Xie} Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement. \cite{Imperva2025} In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+
+This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed itneractions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution.
+
 
 \subsection{Problem Evidence and Market Impact}
+
+% For dynamic pricing systems that map observed demand features into updated prices, contamination is not only a security issue but a statistical one: automated reconnaissance can distort session-level funnels (view-to-cart, look-to-book), inflate demand proxies, and bias elasticity estimates. The practical consequence is mispricing—either supra-competitive outcomes driven by inflated demand signals, or defensive price suppression that harms margin and legitimate customer experience.
+
+The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior such as look-to-book. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown in \cite{Amjad2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data. \cite{Imperva2025}
+
+% • Defensive Price Suppression (Harm to Margin): When algorithms operate on heavily contaminated or noisy data, the risk associated with inaccurate price setting increases. To mitigate the unknown risk introduced by bad data, some systems may default to defensive price suppression to ensure sales continuity, thereby unnecessarily harming margins and resulting in lost revenue. Furthermore, systems that are poorly constrained can learn undesirable behaviors like price gouging in pursuit of short-term rewards if not properly monitored.
+\cite{Mullapudi}
+
+
 Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
 
 \subsection{Theoretical Foundations: Economic Prallels}