diff --git a/paper/src/bib/references.bib b/paper/src/bib/references.bib
index a738969..a4ba6c7 100644
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -26,7 +26,7 @@
 	file = {PDF:/home/velocitatem/Zotero/storage/Q7J5EBEJ/3447815.pdf:application/pdf},
 }
 
-@phdthesis{salassa_politecnico_nodate,
+@phdthesis{salassa_politecnico_2024,
 	title = {Politecnico di {Torino} {Algorithmic} {Pricing} in the digital age "{Ethical} considerations on its economic and social implications, and an analysis of possible solutions to overcome its critical issues" {Tutor}: {Candidate}},
 	abstract = {Algorithmic pricing is an emerging business practice that uses computational algorithms to determine
 the prices of products and services based on a number of dynamic factors. The aim of this thesis is to
@@ -50,6 +50,8 @@ laws, for fair and non-discriminatory use.},
 	urldate = {2025-11-12},
 	school = {Politecnico di Torino},
 	author = {Salassa, Fabio and Pautassi, Paolo},
+	month = apr,
+	year = {2024},
 	file = {PDF:/home/velocitatem/Zotero/storage/L95WYQ8B/m-api-06aad998-d926-0d59-5593-82fdce5a678b.pdf:application/pdf},
 }
 
@@ -62,11 +64,12 @@ laws, for fair and non-discriminatory use.},
 	file = {PDF:/home/velocitatem/Zotero/storage/IZD3C5SR/m-api-26f6207c-cc89-4aed-29b6-34629f18fe9b.pdf:application/pdf},
 }
 
-@article{shahidi_coasean_nodate,
+@article{shahidi_coasean_2025,
 	title = {The {Coasean} {Singularity}? {Demand}, {Supply}, and {Market} {Design} with {AI} {Agents}},
 	abstract = {AI agents—autonomous systems that perceive, reason, and act on behalf of human principals—are poised to transform digital markets by dramatically reducing transaction costs. This chapter evaluates the economic implications of this transition, adopting a consumeroriented view of agents as market participants that can search, negotiate, and transact directly. From the demand side, agent adoption reflects derived demand: users trade off decision quality against effort reduction, with outcomes mediated by agent capability and task context. On the supply side, firms will design, integrate, and monetize agents, with outcomes hinging on whether agents operate within or across platforms. At the market level, agents create efficiency gains from lower search, communication, and contracting costs, but also introduce frictions such as congestion and price obfuscation. By lowering the costs of preference elicitation, contract enforcement, and identity verification, agents expand the feasible set of market designs but also raise novel regulatory challenges. While the net welfare effects remain an empirical question, the rapid onset of AI-mediated transactions presents a unique opportunity for economic research to inform real-world policy and market design.},
 	language = {en},
 	author = {Shahidi, Peyman and Rusak, Gili and Manning, Benjamin S and Fradkin, Andrey and Horton, John J},
+	year = {2025},
 	file = {PDF:/home/velocitatem/Zotero/storage/TQCAPJDP/Shahidi et al. - The Coasean Singularity Demand, Supply, and Market Design with AI Agents.pdf:application/pdf},
 }
 
@@ -84,10 +87,14 @@ laws, for fair and non-discriminatory use.},
 	file = {PDF:/home/velocitatem/Zotero/storage/ZLJQ4DQ9/Byrnes - 2025 - Intro to Brain-Like-AGI Safety.pdf:application/pdf},
 }
 
-@article{shannon_mathematical_nodate,
+@article{shannon_mathematical_1948,
 	title = {A {Mathematical} {Theory} of {Communication}},
+	volume = {27},
 	language = {en},
+	journal = {Bell System Technical Journal},
 	author = {Shannon, C E},
+	month = oct,
+	year = {1948},
 	file = {PDF:/home/velocitatem/Zotero/storage/FJRFRWK2/Shannon - A Mathematical Theory of Communication.pdf:application/pdf},
 }
 
@@ -96,11 +103,13 @@ laws, for fair and non-discriminatory use.},
 	file = {PDF:/home/velocitatem/Zotero/storage/D3QRGY9Z/order_stats.pdf:application/pdf},
 }
 
-@article{devine_nonlinear_nodate,
+@article{devine_nonlinear_2017,
 	title = {Nonlinear {Pricing} with {Costly} {Information} {Acquisition}},
 	abstract = {This paper examines a nonlinear pricing model where the ﬁrm can choose to acquire costly information prior to oﬀering contract menus to consumers; such as paying a consultant or investing in machine learning technologies. Information provides the ﬁrm with a signal about consumers types, whose accuracy increases as the ﬁrm acquires larger amounts of information. We show that the ﬁrm chooses to acquire information, only if it can purchase a suﬃcient amount that could alter its initial prior beliefs. Relative to standard settings where ﬁrms cannot acquire information, we identify how information acquisition changes optimal contract oﬀers, equilibrium proﬁts, information rents, and welfare. A better-informed ﬁrm increases its expected proﬁts, but it can also increase expected utility when the cost of information is intermediate. Our results recommend balanced online privacy laws.},
 	language = {en},
 	author = {Devine, Brett R and Munoz-Garcia, Felix},
+	month = nov,
+	year = {2017},
 	file = {PDF:/home/velocitatem/Zotero/storage/GQ28KVBF/Devine and Munoz-Garcia - Nonlinear Pricing with Costly Information Acquisition.pdf:application/pdf},
 }
 
@@ -202,10 +211,11 @@ laws, for fair and non-discriminatory use.},
 	file = {PDF:/home/velocitatem/Zotero/storage/U7A5Q78V/Karten et al. - 2025 - LLM Economist Large Population Models and Mechanism Design in Multi-Agent Generative Simulacra.pdf:application/pdf},
 }
 
-@techreport{mullapudi_reinforcement_nodate,
+@techreport{mullapudi_reinforcement_2025,
 	title = {A {Reinforcement} {Learning} {Approach} to {Dynamic} {Pricing}},
 	abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
 	author = {Mullapudi, Pavan},
+	year = {2025},
 	note = {Publication Title: International Journal on Science and Technology (IJSAT) IJSAT25049558
 Volume: 16
 Issue: 4},
@@ -294,10 +304,11 @@ Issue: 4},
 	file = {PDF:/home/velocitatem/Zotero/storage/S8635QX6/varian95a.pdf:application/pdf},
 }
 
-@book{russell_artificial_nodate,
+@book{russell_artificial_2021,
 	title = {Artificial {Intelligence} {A} {Modern} {Approach} {Fourth} {Edition} {Global} {Edition}},
 	isbn = {978-1-292-40117-1},
 	author = {Russell, Stuart and Norvig, Peter},
+	year = {2021},
 	file = {PDF:/home/velocitatem/Zotero/storage/6B8W8S27/efdd4d1d4c2087fe1cbe03d9ced67f34.pdf:application/pdf},
 }
 
@@ -331,11 +342,13 @@ Volume: 21},
 	file = {PDF:/home/velocitatem/Zotero/storage/H8IS64AW/2411.13768v2.pdf:application/pdf},
 }
 
-@techreport{xie_osworld_nodate,
+@techreport{xie_osworld_2024,
 	title = {{OSWORLD}: {Benchmarking} {Multimodal} {Agents} for {Open}-{Ended} {Tasks} in {Real} {Computer} {Environments}},
 	url = {https://os-world.github.io},
 	abstract = {Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWORLD, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWORLD can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWORLD, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWORLD reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36\% of the tasks, the best model achieves only 12.24\% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWORLD provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at https://os-world.github.io.},
 	author = {Xie, Tianbao and Zhang, Danyang and Chen, Jixuan and Li, Xiaochuan and Zhao, Siheng and Cao, Ruisheng and Jing Hua, Toh and Cheng, Zhoujun and Shin, Dongchan and Lei, Fangyu and Liu, Yitao and Xu, Yiheng and Zhou, Shuyan and Savarese, Silvio and Xiong, Caiming and Zhong, Victor and Yu, Tao},
+	month = may,
+	year = {2024},
 	note = {arXiv: 2404.07972v2},
 	file = {PDF:/home/velocitatem/Zotero/storage/LLRKXIC7/full-text.pdf:application/pdf},
 }
@@ -373,12 +386,14 @@ Volume: 21},
 	file = {PDF:/home/velocitatem/Zotero/storage/IQL6FPWE/Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff - Bloomberg.pdf:application/pdf},
 }
 
-@techreport{besbes_dynamic_nodate,
+@techreport{besbes_dynamic_2007,
 	title = {Dynamic {Pricing} {Without} {Knowing} the {Demand} {Function}: {Risk} {Bounds} and {Near}-{Optimal} {Algorithms} *},
 	abstract = {We consider a single product revenue management problem where, given an initial inventory, the objective is to dynamically adjust prices over a finite sales horizon to maximize expected revenues. Realized demand is observed over time, but the underlying functional relationship between price and mean demand rate that governs these observations (otherwise known as the demand function or demand curve), is not known. We consider two instances of this problem: i.) a setting where the demand function is assumed to belong to a known parametric family with unknown parameter values; and ii.) a setting where the demand function is assumed to belong to a broad class of functions that need not admit any parametric representation. In each case we develop policies that learn the demand function "on the fly," and optimize prices based on that. The performance of these algorithms is measured in terms of the regret: the revenue loss relative to the maximal revenues that can be extracted when the demand function is known prior to the start of the selling season. We derive lower bounds on the regret that hold for any admissible pricing policy, and then show that our proposed algorithms achieve a regret that is "close" to this lower bound. The magnitude of the regret can be interpreted as the economic value of prior knowledge on the demand function; manifested as the revenue loss due to model uncertainty.},
 	author = {Besbes, Omar and Zeevi, Assaf},
+	month = dec,
+	year = {2007},
 	note = {Publication Title: Operations Research},
-	keywords = {learning, asymptotic analysis, estimation, exploration-exploitation, pricing, Revenue management, value of information},
+	keywords = {asymptotic analysis, estimation, exploration-exploitation, learning, pricing, Revenue management, value of information},
 	file = {PDF:/home/velocitatem/Zotero/storage/SBAIB4V2/Dp_wo_demand_risk_ob_az_posted.pdf:application/pdf},
 }
 
@@ -426,19 +441,22 @@ Volume: 21},
 	file = {PDF:/home/velocitatem/Zotero/storage/3Z2XK4QC/Ganie - 2025 - Uncertainty in Authorship Why Perfect AI Detection Is Mathematically Impossible.pdf:application/pdf},
 }
 
-@article{shi_distributionally_nodate,
+@article{shi_distributionally_2024,
 	title = {Distributionally {Robust} {Model}-{Based} {Oﬄine} {Reinforcement} {Learning} with {Near}-{Optimal} {Sample} {Complexity}},
 	abstract = {This paper concerns the central issues of model robustness and sample eﬃciency in oﬄine reinforcement learning (RL), which aims to learn to perform decision making from history data without active exploration. Due to uncertainties and variabilities of the environment, it is critical to learn a robust policy—with as few samples as possible—that performs well even when the deployed environment deviates from the nominal one used to collect the history dataset. We consider a distributionally robust formulation of oﬄine RL, focusing on tabular robust Markov decision processes with an uncertainty set speciﬁed by the Kullback-Leibler divergence in both ﬁnite-horizon and inﬁnite-horizon settings. To combat with sample scarcity, a model-based algorithm that combines distributionally robust value iteration with the principle of pessimism in the face of uncertainty is proposed, by penalizing the robust value estimates with a carefully designed data-driven penalty term. Under a mild and tailored assumption of the history dataset that measures distribution shift without requiring full coverage of the state-action space, we establish the ﬁnite-sample complexity of the proposed algorithms. We further develop an informationtheoretic lower bound, which suggests that learning RMDPs is at least as hard as the standard MDPs when the uncertainty level is suﬃcient small, and corroborates the tightness of our upper bound up to polynomial factors of the (eﬀective) horizon length for a range of uncertainty levels. To the best our knowledge, this provides the ﬁrst provably near-optimal robust oﬄine RL algorithm that learns under model uncertainty and partial coverage.},
 	language = {en},
 	author = {Shi, Laixi and Chi, Yuejie},
+	month = jun,
+	year = {2024},
 	file = {PDF:/home/velocitatem/Zotero/storage/K56G4EIP/Shi and Chi - Distributionally Robust Model-Based Oﬄine Reinforcement Learning with Near-Optimal Sample Complexity.pdf:application/pdf},
 }
 
-@article{dutting_mechanism_nodate,
+@article{dutting_mechanism_2025,
 	title = {Mechanism {Design} for {Large} {Language} {Models} ({Extended} {Abstract})},
 	abstract = {We investigate auction mechanisms for AIgenerated content, focusing on applications like ad creative generation. In our model, agents’ preferences over stochastically generated content are encoded as large language models (LLMs). We propose an auction format that operates on a tokenby-token basis, and allows LLM agents to inﬂuence content creation through single dimensional bids. We formulate two desirable incentive properties and prove their equivalence to a monotonicity condition on output aggregation. This equivalence enables a second-price rule design, even absent explicit agent valuation functions. Our design is supported by demonstrations on a publicly available LLM.},
 	language = {en},
 	author = {Dütting, Paul and Mirrokni, Vahab and Leme, Renato Paes and Xu, Haifeng and Zuo, Song},
+	year = {2025},
 	file = {PDF:/home/velocitatem/Zotero/storage/2ABDEYDN/Dütting et al. - Mechanism Design for Large Language Models (Extended Abstract).pdf:application/pdf},
 }
 
diff --git a/paper/src/chapters/01-intro.tex b/paper/src/chapters/01-intro.tex
index 40beacd..2df2f27 100644
--- a/paper/src/chapters/01-intro.tex
+++ b/paper/src/chapters/01-intro.tex
@@ -14,7 +14,7 @@ This research effort touches a large variety of domains, spanning behavioral eco
 
 \subsection{Motivation and Market Context}
 
-The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \parencite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \parencite{xie_osworld_nodate}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \parencite{markntel_advisors_global_2025}.
+The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \parencite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \parencite{xie_osworld_2024}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \parencite{markntel_advisors_global_2025}.
 
 The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \parencite{imperva_rapid_2025}.
 
diff --git a/paper/src/chapters/02-literature-review.tex b/paper/src/chapters/02-literature-review.tex
index 539afd3..af50df6 100644
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -4,16 +4,16 @@ To better understand all wedges of the current works, we must start by exploring
 
 \subsection{Agent Taxonomy and Definitions}
 
-An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \textcite{russell_artificial_nodate} is further developed in an economic context by \textcite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
+An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \textcite{russell_artificial_2021} is further developed in an economic context by \textcite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
 A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes \parencite{xia_evaluation-driven_2025}.
 
-We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate; this is linked to the lack of grounding of these agents and their inability of handling unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
+We must however acknowledge the current SOTA as presented by OSWORLD simulations by \textcite{xie_osworld_2024} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate; this is linked to the lack of grounding of these agents and their inability of handling unexpected errors. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
 
 We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.
 
 \subsection{Economic Agents: From Homo Economicus to Machina Economicus}
 
-Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \textcite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \textcite{varian_economic_1995} due to its expected utility maximizing nature. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes \parencite{xie_osworld_nodate}. Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement \parencite{imperva_rapid_2025}. In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \textcite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \textcite{varian_economic_1995} due to its expected utility maximizing nature. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes \parencite{xie_osworld_2024}. Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement \parencite{imperva_rapid_2025}. In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
 
 This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \textcite{parkes_economic_2015}.
 
@@ -23,7 +23,7 @@ A HAP (HTTP Agent Profile) protocol has been developed as an internet draft by \
 
 The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown by \textcite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data \parencite{imperva_rapid_2025}.
 
-When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy \parencite{mullapudi_reinforcement_nodate}.
+When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy \parencite{mullapudi_reinforcement_2025}.
 
 
 
@@ -37,7 +37,7 @@ Early hints of exploration of prices in a standard English auction explored by \
 
 Like in classical revenue-maximizing auctions \parencite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.
 
-The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation. As proposed by \textcite{shahidi_coasean_nodate} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets. As argued by \textcite{coase_nature_1937}, the market participation and time associated with that participation, is critical part of the Coasean transaction cost logic which includes the discovery or relevant pricing within a given market. This process of price discovery without the presence of AI Agents can be time consuming and resource intensive. To build on top of this work we provide a proof of optimal conditions theorised by Coaes as an extension to AI-mediated markets.
+The key component of this mediation between agents and commercial platforms lays in the transaction costs related to information gathering and negotiation. As proposed by \textcite{shahidi_coasean_2025} these costs are bound to collapse towards zero (which we demonstrate mathematically), calling for a re-evaluation of the boundaries between firms and markets. As argued by \textcite{coase_nature_1937}, the market participation and time associated with that participation, is critical part of the Coasean transaction cost logic which includes the discovery or relevant pricing within a given market. This process of price discovery without the presence of AI Agents can be time consuming and resource intensive. To build on top of this work we provide a proof of optimal conditions theorised by Coaes as an extension to AI-mediated markets.
 
 % Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance