refactoring bibliography

2026-07-15 17:43:36 +00:00 · 2026-01-09 14:06:48 +01:00
parent 7b3572fe1b
commit f2d29f578a
4 changed files with 440 additions and 147 deletions
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -1,137 +1,425 @@
-@techReport{Imperva2025,
-   author = {Imperva},
-   title = {The Rapid Rise of Bots and the Unseen Risk for Business \#2025BADBOTREPORT},
-   year = {2025}
+
+@article{arnoud_v_den_boer_dynamic_2015,
+	title = {Dynamic pricing and learning: {Historical} origins, current research, and new directions},
+	volume = {20},
+	url = {https://www.sciencedirect.com/science/article/pii/S1876735415000021},
+	doi = {10.1016/j.sorms.2015.03.001},
+	number = {1},
+	journal = {Surveys in Operations Research and Management Science},
+	author = {{Arnoud V. den Boer}},
+	month = jun,
+	year = {2015},
+	pages = {1--18},
+	file = {PDF:/home/velocitatem/Zotero/storage/NUAGDYER/memo2025.pdf:application/pdf},
 }

-@techReport{Xie,
-   abstract = {Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWORLD, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWORLD can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWORLD, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWORLD reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36% of the tasks, the best model achieves only 12.24% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWORLD provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at https://os-world.github.io.},
-   author = {Tianbao Xie and Danyang Zhang and Jixuan Chen and Xiaochuan Li and Siheng Zhao and Ruisheng Cao and Toh Jing Hua and Zhoujun Cheng and Dongchan Shin and Fangyu Lei and Yitao Liu and Yiheng Xu and Shuyan Zhou and Silvio Savarese and Caiming Xiong and Victor Zhong and Tao Yu},
-   title = {OSWORLD: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments},
-   url = {https://os-world.github.io}
+@article{iliou_detection_2021,
+	title = {Detection of {Advanced} {Web} {Bots} by {Combining} {Web} {Logs} with {Mouse} {Behavioural} {Biometrics}},
+	volume = {2},
+	url = {https://dl.acm.org/doi/10.1145/3447815},
+	doi = {10.1145/3447815},
+	number = {3},
+	journal = {Digital Threats: Research and Practice},
+	author = {Iliou, Christos and Kostoulas, Theodoros and Tsikrika, Theodora and Katos, Vasilis and Vrochidis, Stefanos and Kompatsiaris, Ioannis},
+	year = {2021},
+	pages = {1--26},
+	file = {PDF:/home/velocitatem/Zotero/storage/Q7J5EBEJ/3447815.pdf:application/pdf},
 }

-@techReport{MarkNtelAdvisors2025,
-   author = {MarkNtel Advisors},
-   institution = {MarkNtel Advisors},
-   title = {Global AI Agent Market Research Report: Forecast (2026–2032)},
-   url = {https://www.marknteladvisors.com/research-library/ai-agent-market.html},
-   year = {2025}
+@phdthesis{salassa_politecnico_nodate,
+	title = {Politecnico di {Torino} {Algorithmic} {Pricing} in the digital age "{Ethical} considerations on its economic and social implications, and an analysis of possible solutions to overcome its critical issues" {Tutor}: {Candidate}},
+	abstract = {Algorithmic pricing is an emerging business practice that uses computational algorithms to determine
+the prices of products and services based on a number of dynamic factors. The aim of this thesis is to
+draw attention to the existence of these business practices, and the ethical and social implications that
+derive from them, and then focus on what could be effective solutions to increase the well-being of
+the community.
+In Chapter 2 of the thesis, a general introduction to the topic will be made, starting from its history
+and its evolution over the years; Chapter 3 will examine the different types of pricing algorithms.
+Subsequently, in Chapter 4 we will analyze the sectors in which they are most applicable, and the
+relative advantages and disadvantages they bring with them, with a critical analysis of the trade-offs
+generated. The effect of algorithmic pricing on competition will be studied, considering how the
+ability of algorithms to adapt quickly to market conditions can foster anti-competitive practices, such
+as price discrimination. Later, in Chapter 5, we will look at the issue of price transparency and how
+the opacity of algorithms can make it difficult for consumers to understand the pricing process and
+assess whether they are receiving fair treatment.
+To address these ethical issues, several possible solutions will be brought to light, described in
+Chapter 6, which will focus on the role of the government, as a regulatory, of the end consumer, who
+must be encouraged to educate and inform himself about the use of these practices, and of the
+company, as responsible for making its customers aware and acting in compliance with government
+laws, for fair and non-discriminatory use.},
+	urldate = {2025-11-12},
+	school = {Politecnico di Torino},
+	author = {Salassa, Fabio and Pautassi, Paolo},
+	file = {PDF:/home/velocitatem/Zotero/storage/L95WYQ8B/m-api-06aad998-d926-0d59-5593-82fdce5a678b.pdf:application/pdf},
 }

-@article{Xia2025,
-   abstract = {Large Language Models (LLMs) have enabled the emergence of LLM agents, systems capable of pursuing under-specified goals and adapting after deployment. Evaluating such agents is challenging because their behavior is open ended, probabilistic, and shaped by system-level interactions over time. Traditional evaluation methods, built around fixed benchmarks and static test suites, fail to capture emergent behaviors or support continuous adaptation across the lifecycle. To ground a more systematic approach, we conduct a multivocal literature review (MLR) synthesizing academic and industrial evaluation practices. The findings directly inform two empirically derived artifacts: a process model and a reference architecture that embed evaluation as a continuous, governing function rather than a terminal checkpoint. Together they constitute the evaluation-driven development and operations (EDDOps) approach, which unifies offline (development-time) and online (runtime) evaluation within a closed feedback loop. By making evaluation evidence drive both runtime adaptation and governed redevelopment, EDDOps supports safer, more traceable evolution of LLM agents aligned with changing objectives, user needs, and governance constraints.},
-   author = {Boming Xia and Qinghua Lu and Liming Zhu and Zhenchang Xing and Dehai Zhao and Hao Zhang},
-   month = {11},
-   title = {Evaluation-Driven Development and Operations of LLM Agents: A Process Model and Reference Architecture},
-   url = {http://arxiv.org/abs/2411.13768},
-   year = {2025}
+@inproceedings{mueller_low-rank_2019,
+	title = {Low-{Rank} {Bandit} {Methods} for {High}-{Dimensional} {Dynamic} {Pricing}},
+	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 32 ({NeurIPS} 2019)},
+	author = {Mueller, Jonas W and Syrgkanis, Vasilis and Taddy, Matt},
+	year = {2019},
+	pages = {15442--15452},
+	file = {PDF:/home/velocitatem/Zotero/storage/IZD3C5SR/m-api-26f6207c-cc89-4aed-29b6-34629f18fe9b.pdf:application/pdf},
 }

-
-@misc{AmazonvsPerplexity,
-   author = {Shirin Ghaffary and Matt Day},
-   note = {Updated 2025-11-05},
-   title = {Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff},
-   url = {https://www.bloomberg.com/news/articles/2025-11-04/amazon-demands-perplexity-stop-ai-agent-from-making-purchases}
+@article{shahidi_coasean_nodate,
+	title = {The {Coasean} {Singularity}? {Demand}, {Supply}, and {Market} {Design} with {AI} {Agents}},
+	abstract = {AI agents—autonomous systems that perceive, reason, and act on behalf of human principals—are poised to transform digital markets by dramatically reducing transaction costs. This chapter evaluates the economic implications of this transition, adopting a consumeroriented view of agents as market participants that can search, negotiate, and transact directly. From the demand side, agent adoption reflects derived demand: users trade off decision quality against effort reduction, with outcomes mediated by agent capability and task context. On the supply side, firms will design, integrate, and monetize agents, with outcomes hinging on whether agents operate within or across platforms. At the market level, agents create efficiency gains from lower search, communication, and contracting costs, but also introduce frictions such as congestion and price obfuscation. By lowering the costs of preference elicitation, contract enforcement, and identity verification, agents expand the feasible set of market designs but also raise novel regulatory challenges. While the net welfare effects remain an empirical question, the rapid onset of AI-mediated transactions presents a unique opportunity for economic research to inform real-world policy and market design.},
+	language = {en},
+	author = {Shahidi, Peyman and Rusak, Gili and Manning, Benjamin S and Fradkin, Andrey and Horton, John J},
+	file = {PDF:/home/velocitatem/Zotero/storage/TQCAPJDP/Shahidi et al. - The Coasean Singularity Demand, Supply, and Market Design with AI Agents.pdf:application/pdf},
 }

-
-@inproceedings{Mueller2019,
-   author = {Jonas W Mueller and Vasilis Syrgkanis and Matt Taddy},
-   booktitle = {Advances in Neural Information Processing Systems 32 (NeurIPS 2019)},
-   pages = {15442-15452},
-   title = {Low-Rank Bandit Methods for High-Dimensional Dynamic Pricing},
-   url = {https://proceedings.neurips.cc/paper/2019/file/0a3df70393993583a13c0dd6686f3f32-Paper.pdf},
-   year = {2019}
+@misc{byrnes_intro_2025,
+	title = {Intro to {Brain}-{Like}-{AGI} {Safety}},
+	url = {https://osf.io/fe36n_v1},
+	doi = {10.31219/osf.io/fe36n_v1},
+	abstract = {Suppose we someday build an Artificial General Intelligence (AGI) algorithm using similar principles of learning and cognition as the human brain. How would we use such an algorithm safely? I argue that this is an open technical problem, and my goal is to bring readers with no prior knowledge all the way up to the front-line of unsolved problems. Chapter 1 has background and motivation; Chapters 2-7 are on neuroscience, arguing for a picture of the brain that combines large-scale learning algorithms (e.g. in the cortex) and specific evolved reflexes (e.g. in the hypothalamus and brainstem); and Chapters 8-15 apply those neuroscience ideas to AGI safety. A major theme is the idea that the brain has something like a reinforcement learning reward function, which says that pain is bad, eating-when-hungry is good, etc. I argue that this reward function is centered around the hypothalamus and brainstem, and that all human desires—even "higher" desires for things like compassion and justice—come directly or indirectly from that innate reward function. If future programmers build brain-like AGI, they will likewise have a reward function slot in their source code, in which they can put whatever they want. If they put the wrong thing, the resulting AGI will wind up callously indifferent to human welfare. How might they avoid that? That's an open technical problem, but I will review some ideas and research directions.},
+	language = {en},
+	urldate = {2025-12-31},
+	publisher = {Open Science Framework},
+	author = {Byrnes, Steven J.},
+	month = mar,
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/ZLJQ4DQ9/Byrnes - 2025 - Intro to Brain-Like-AGI Safety.pdf:application/pdf},
 }

-@article{Amjad2017,
-   abstract = { In this paper, the question of interest is estimating true demand of a product at a given store location and time period in the retail environment based on a single noisy and potentially censored observation. To address this question, we introduce a %non-parametric framework to make inference from multiple time series. Somewhat surprisingly, we establish that the algorithm introduced for the purpose of "matrix completion" can be used to solve the relevant inference problem. Specifically, using the Universal Singular Value Thresholding (USVT) algorithm [7], we show that our estimator is consistent: the average mean squared error of the estimated average demand with respect to the true average demand goes to 0 as the number of store locations and time intervals increase to $\infty$. We establish naturally appealing properties of the resulting estimator both analytically as well as through a sequence of instructive simulations. Using a real dataset in retail (Walmart), we argue for the practical relevance of our approach. },
-   author = {Muhammad J. Amjad and Devavrat Shah},
-   doi = {10.1145/3154489},
-   issue = {2},
-   journal = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
-   month = {12},
-   pages = {1-28},
-   publisher = {Association for Computing Machinery (ACM)},
-   title = {Censored Demand Estimation in Retail},
-   volume = {1},
-   url = {https://par.nsf.gov/servlets/purl/10066022},
-   year = {2017}
+@article{shannon_mathematical_nodate,
+	title = {A {Mathematical} {Theory} of {Communication}},
+	language = {en},
+	author = {Shannon, C E},
+	file = {PDF:/home/velocitatem/Zotero/storage/FJRFRWK2/Shannon - A Mathematical Theory of Communication.pdf:application/pdf},
 }
-@misc{Parkes2015,
-   abstract = {The field of artificial intelligence (AI) strives to build rational agents capable of perceiving the world around them and taking actions to advance specified goals. Put another way, AI researchers aim to construct a synthetic homo economicus, the mythical perfectly rational agent of neoclassical economics.We review progress toward creating this new species of machine, machina economicus, and discuss some challenges in designing AIs that can reason effectively in economic contexts. Supposing that AI succeeds in this quest, or at least comes close enough that it is useful to think about AIs in rationalistic terms, we ask how to design the rules of interaction in multi-agent systems that come to represent an economy of AIs.Theories of normative design from economics may prove more relevant for artificial agents than human agents, with AIs that better respect idealized assumptions of rationality than people, interacting through novel rules and incentive systems quite distinct from those tailored for people.},
-   author = {David C. Parkes and Michael P. Wellman},
-   doi = {10.1126/science.aaa8403},
-   issn = {10959203},
-   issue = {6245},
-   journal = {Science},
-   month = {7},
-   pages = {267-272},
-   pmid = {26185245},
-   publisher = {American Association for the Advancement of Science},
+
+@misc{noauthor_order_stats_nodate,
+	title = {order\_stats},
+	file = {PDF:/home/velocitatem/Zotero/storage/D3QRGY9Z/order_stats.pdf:application/pdf},
+}
+
+@article{devine_nonlinear_nodate,
+	title = {Nonlinear {Pricing} with {Costly} {Information} {Acquisition}},
+	abstract = {This paper examines a nonlinear pricing model where the ﬁrm can choose to acquire costly information prior to oﬀering contract menus to consumers; such as paying a consultant or investing in machine learning technologies. Information provides the ﬁrm with a signal about consumers types, whose accuracy increases as the ﬁrm acquires larger amounts of information. We show that the ﬁrm chooses to acquire information, only if it can purchase a suﬃcient amount that could alter its initial prior beliefs. Relative to standard settings where ﬁrms cannot acquire information, we identify how information acquisition changes optimal contract oﬀers, equilibrium proﬁts, information rents, and welfare. A better-informed ﬁrm increases its expected proﬁts, but it can also increase expected utility when the cost of information is intermediate. Our results recommend balanced online privacy laws.},
+	language = {en},
+	author = {Devine, Brett R and Munoz-Garcia, Felix},
+	file = {PDF:/home/velocitatem/Zotero/storage/GQ28KVBF/Devine and Munoz-Garcia - Nonlinear Pricing with Costly Information Acquisition.pdf:application/pdf},
+}
+
+@misc{wang_learning_2025,
+	title = {Learning {Optimal} {Distributionally} {Robust} {Stochastic} {Control} in {Continuous} {State} {Spaces}},
+	url = {http://arxiv.org/abs/2406.11281},
+	doi = {10.48550/arXiv.2406.11281},
+	abstract = {We study data-driven learning of robust stochastic control for infinite-horizon systems with potentially continuous state and action spaces. In many managerial settings–supply chains, finance, manufacturing, services, and dynamic games–the state-transition mechanism is determined by system design, while available data capture the distributional properties of the stochastic inputs from the environment. For modeling and computational tractability, a decision maker often adopts a Markov control model with i.i.d. environment inputs, which can render learned policies fragile to internal dependence or external perturbations. We introduce a distributionally robust stochastic control paradigm that promotes policy reliability by introducing adaptive adversarial perturbations to the environment input, while preserving the modeling, statistical, and computational tractability of the Markovian formulation. From a modeling perspective, we examine two adversary models–current-action-aware and current-action-unaware–leading to distinct dynamic behaviors and robust optimal policies. From a statistical learning perspective, we characterize optimal finite-sample minimax rates for uniform learning of the robust value function across a continuum of states under ambiguity sets defined by the fk-divergence and Wasserstein distance. To efficiently compute the optimal robust policies, we further propose algorithms inspired by deep reinforcement learning methodologies. Finally, we demonstrate the applicability of the framework to real managerial problems.},
+	language = {en},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Wang, Shengbo and Meng, Jason and Si, Nian and Blanchet, Jose and Zhou, Zhengyuan},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2406.11281 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
+	file = {PDF:/home/velocitatem/Zotero/storage/RQ8XDSSG/Wang et al. - 2025 - Learning Optimal Distributionally Robust Stochastic Control in Continuous State Spaces.pdf:application/pdf},
+}
+
+@misc{ie_recsim_2019,
+	title = {{RecSim}: {A} {Configurable} {Simulation} {Platform} for {Recommender} {Systems}},
+	shorttitle = {{RecSim}},
+	url = {http://arxiv.org/abs/1909.04847},
+	doi = {10.48550/arXiv.1909.04847},
+	abstract = {We propose RecSim, a configurable platform for authoring simulation environments for recommender systems (RSs) that naturally supports sequential interaction with users. RecSim allows the creation of new environments that reflect particular aspects of user behavior and item structure at a level of abstraction well-suited to pushing the limits of current reinforcement learning (RL) and RS techniques in sequential interactive recommendation problems. Environments can be easily configured that vary assumptions about: user preferences and item familiarity; user latent state and its dynamics; and choice models and other user response behavior. We outline how RecSim offers value to RL and RS researchers and practitioners, and how it can serve as a vehicle for academic-industrial collaboration.},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Ie, Eugene and Hsu, Chih-wei and Mladenov, Martin and Jain, Vihan and Narvekar, Sanmit and Wang, Jing and Wu, Rui and Boutilier, Craig},
+	month = sep,
+	year = {2019},
+	note = {arXiv:1909.04847 [cs]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Human-Computer Interaction, Computer Science - Information Retrieval},
+	file = {Preprint PDF:/home/velocitatem/Zotero/storage/CJJI2VQF/Ie et al. - 2019 - RecSim A Configurable Simulation Platform for Recommender Systems.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/8XJKJTHE/1909.html:text/html},
+}
+
+@misc{kuhn_wasserstein_2024,
+	title = {Wasserstein {Distributionally} {Robust} {Optimization}: {Theory} and {Applications} in {Machine} {Learning}},
+	shorttitle = {Wasserstein {Distributionally} {Robust} {Optimization}},
+	url = {http://arxiv.org/abs/1908.08729},
+	doi = {10.48550/arXiv.1908.08729},
+	abstract = {Many decision problems in science, engineering and economics are affected by uncertain parameters whose distribution is only indirectly observable through samples. The goal of data-driven decision-making is to learn a decision from finitely many training samples that will perform well on unseen test samples. This learning task is difficult even if all training and test samples are drawn from the same distribution—especially if the dimension of the uncertainty is large relative to the training sample size. Wasserstein distributionally robust optimization seeks data-driven decisions that perform well under the most adverse distribution within a certain Wasserstein distance from a nominal distribution constructed from the training samples. In this tutorial we will argue that this approach has many conceptual and computational benefits. Most prominently, the optimal decisions can often be computed by solving tractable convex optimization problems, and they enjoy rigorous out-of-sample and asymptotic consistency guarantees. We will also show that Wasserstein distributionally robust optimization has interesting ramifications for statistical learning and motivates new approaches for fundamental learning tasks such as classification, regression, maximum likelihood estimation or minimum mean square error estimation, among others.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Kuhn, Daniel and Esfahani, Peyman Mohajerin and Nguyen, Viet Anh and Shafieezadeh-Abadeh, Soroosh},
+	month = nov,
+	year = {2024},
+	note = {arXiv:1908.08729 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
+	file = {PDF:/home/velocitatem/Zotero/storage/FAWJEK6J/Kuhn et al. - 2024 - Wasserstein Distributionally Robust Optimization Theory and Applications in Machine Learning.pdf:application/pdf},
+}
+
+@misc{arunachaleswaran_learning_2025,
+	title = {Learning to {Play} {Against} {Unknown} {Opponents}},
+	url = {http://arxiv.org/abs/2412.18297},
+	doi = {10.48550/arXiv.2412.18297},
+	abstract = {We consider the problem of a learning agent who has to repeatedly play a general sum game against a strategic opponent who acts to maximize their own payoﬀ by optimally responding against the learner’s algorithm. The learning agent knows their own payoﬀ function, but is uncertain about the payoﬀ of their opponent (knowing only that it is drawn from some distribution D). What learning algorithm should the agent run in order to maximize their own total utility, either in expectation or in the worst-case over D? When the learning algorithm is constrained to be a no-regret algorithm, we demonstrate how to eﬃciently construct an optimal learning algorithm (asymptotically achieving the optimal utility) in polynomial time for both the in-expectation and worst-case problems, independent of any other assumptions. When the learning algorithm is not constrained to no-regret, we show how to construct an ε-optimal learning algorithm (obtaining average utility within ε of the optimal utility) for both the in-expectation and worst-case problems in time polynomial in the size of the input and 1/ε, when either the size of the game or the support of D is constant. Finally, for the special case of the maximin objective, where the learner wishes to maximize their minimum payoﬀ over all possible optimizer types, we construct a learner algorithm that runs in polynomial time in each step and guarantees convergence to the optimal learner payoﬀ. All of these results make use of recently developed machinery that converts the analysis of learning algorithms to the study of the class of corresponding geometric objects known as menus.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Arunachaleswaran, Eshwar Ram and Collina, Natalie and Schneider, Jon},
+	month = feb,
+	year = {2025},
+	note = {arXiv:2412.18297 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Computer Science and Game Theory},
+	file = {PDF:/home/velocitatem/Zotero/storage/M6V9LLCS/Arunachaleswaran et al. - 2025 - Learning to Play Against Unknown Opponents.pdf:application/pdf},
+}
+
+@misc{li_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization} with {Adversarial} {Data} {Contamination}},
+	url = {http://arxiv.org/abs/2507.10718},
+	doi = {10.48550/arXiv.2507.10718},
+	abstract = {Distributionally Robust Optimization (DRO) provides a framework for decision-making under distributional uncertainty, yet its effectiveness can be compromised by outliers in the training data. This paper introduces a principled approach to simultaneously address both challenges. We focus on optimizing Wasserstein-1 DRO objectives for generalized linear models with convex Lipschitz loss functions, where an \$ε\$-fraction of the training data is adversarially corrupted. Our primary contribution lies in a novel modeling framework that integrates robustness against training data contamination with robustness against distributional shifts, alongside an efficient algorithm inspired by robust statistics to solve the resulting optimization problem. We prove that our method achieves an estimation error of \$O({\textbackslash}sqrtε)\$ for the true DRO objective value using only the contaminated data under the bounded covariance assumption. This work establishes the first rigorous guarantees, supported by efficient computation, for learning under the dual challenges of data contamination and distributional shifts.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Li, Shuyao and Diakonikolas, Ilias and Diakonikolas, Jelena},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2507.10718 [cs]},
+	keywords = {Computer Science - Machine Learning, Mathematics - Optimization and Control, Computer Science - Data Structures and Algorithms},
+	file = {PDF:/home/velocitatem/Zotero/storage/H6AXDTLX/Li et al. - 2025 - Distributionally Robust Optimization with Adversarial Data Contamination.pdf:application/pdf},
+}
+
+@misc{karten_llm_2025,
+	title = {{LLM} {Economist}: {Large} {Population} {Models} and {Mechanism} {Design} in {Multi}-{Agent} {Generative} {Simulacra}},
+	shorttitle = {{LLM} {Economist}},
+	url = {http://arxiv.org/abs/2507.15815},
+	doi = {10.48550/arXiv.2507.15815},
+	abstract = {We present the LLM Economist, a novel framework that uses agent-based modeling to design and assess economic policies in strategic environments with hierarchical decision-making. At the lower level, bounded rational worker agents—instantiated as persona-conditioned prompts sampled from U.S. Census-calibrated income and demographic statistics—choose labor supply to maximize text-based utility functions learned in-context. At the upper level, a planner agent employs in-context reinforcement learning to propose piecewise-linear marginal tax schedules anchored to the current U.S. federal brackets. This construction endows economic simulacra with three capabilities requisite for credible fiscal experimentation: (i) optimization of heterogeneous utilities, (ii) principled generation of large, demographically realistic agent populations, and (iii) mechanism design—the ultimate nudging problem—expressed entirely in natural language. Experiments with populations of up to one hundred interacting agents show that the planner converges near Stackelberg equilibria that improve aggregate social welfare relative to Saez solutions, while a periodic, persona-level voting procedure furthers these gains under decentralized governance. These results demonstrate that large language model-based agents can jointly model, simulate, and govern complex economic systems, providing a tractable test bed for policy evaluation at the societal scale to help build better civilizations.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Karten, Seth and Li, Wenzhe and Ding, Zihan and Kleiner, Samuel and Bai, Yu and Jin, Chi},
+	month = jul,
+	year = {2025},
+	note = {arXiv:2507.15815 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Multiagent Systems},
+	file = {PDF:/home/velocitatem/Zotero/storage/U7A5Q78V/Karten et al. - 2025 - LLM Economist Large Population Models and Mechanism Design in Multi-Agent Generative Simulacra.pdf:application/pdf},
+}
+
+@techreport{mullapudi_reinforcement_nodate,
+	title = {A {Reinforcement} {Learning} {Approach} to {Dynamic} {Pricing}},
+	abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
+	author = {Mullapudi, Pavan},
+	note = {Publication Title: International Journal on Science and Technology (IJSAT) IJSAT25049558
+Volume: 16
+Issue: 4},
+	keywords = {Index Terms: Dynamic Pricing, Markov Decision Process, Price Optimization, Q-Learning, Reinforcement Learning, Retail Analytics},
+	file = {PDF:/home/velocitatem/Zotero/storage/G95TBLF7/9558.pdf:application/pdf},
+}
+
+@techreport{roughgarden_cs364a_2013,
+	title = {{CS364A}: {Algorithmic} {Game} {Theory} {Lecture} \#5: {Revenue}-{Maximizing} {Auctions} *},
+	author = {Roughgarden, Tim},
+	year = {2013},
+	file = {PDF:/home/velocitatem/Zotero/storage/C39VM7N9/l5.pdf:application/pdf},
+}
+
+@techreport{kuhn_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization}},
+	abstract = {Distributionally robust optimization (DRO) studies decision problems under uncertainty where the probability distribution governing the uncertain problem parameters is itself uncertain. A key component of any DRO model is its ambiguity set, that is, a family of probability distributions consistent with any available structural or statistical information. DRO seeks decisions that perform best under the worst distribution in the ambiguity set. This worst case criterion is supported by findings in psychology and neuroscience, which indicate that many decision-makers have a low tolerance for distributional ambiguity. DRO is rooted in statistics, operations research and control theory, and recent research has uncovered its deep connections to regularization techniques and adversarial training in machine learning. This survey presents the key findings of the field in a unified and self-contained manner.},
+	author = {Kuhn, Daniel and Shafiee, Soroosh and Wiesemann, Wolfram},
+	year = {2025},
+	note = {arXiv: 2411.02549v3},
+	file = {PDF:/home/velocitatem/Zotero/storage/IXTTMD7G/full-text.pdf:application/pdf},
+}
+
+@article{parkes_economic_2015,
 	title = {Economic reasoning and artificial intelligence},
 	volume = {349},
-   year = {2015}
+	issn = {10959203},
+	doi = {10.1126/science.aaa8403},
+	abstract = {The field of artificial intelligence (AI) strives to build rational agents capable of perceiving the world around them and taking actions to advance specified goals. Put another way, AI researchers aim to construct a synthetic homo economicus, the mythical perfectly rational agent of neoclassical economics.We review progress toward creating this new species of machine, machina economicus, and discuss some challenges in designing AIs that can reason effectively in economic contexts. Supposing that AI succeeds in this quest, or at least comes close enough that it is useful to think about AIs in rationalistic terms, we ask how to design the rules of interaction in multi-agent systems that come to represent an economy of AIs.Theories of normative design from economics may prove more relevant for artificial agents than human agents, with AIs that better respect idealized assumptions of rationality than people, interacting through novel rules and incentive systems quite distinct from those tailored for people.},
+	number = {6245},
+	journal = {Science},
+	author = {Parkes, David C. and Wellman, Michael P.},
+	month = jul,
+	year = {2015},
+	pmid = {26185245},
+	note = {Publisher: American Association for the Advancement of Science},
+	pages = {267--272},
+	file = {PDF:/home/velocitatem/Zotero/storage/27KLNFRU/_aiEcon.pdf:application/pdf},
 }

-@book{Russell,
-   author = {Stuart Russell and Peter Norvig},
-   isbn = {978-1-292-40117-1},
-   title = {Artificial Intelligence A Modern Approach Fourth Edition Global Edition}
-}
-
-@techReport{Varian,
-   abstract = {The eeld of economic mechanism design has been an active area of research in economics for at least 20 years. This eld uses the tools of economics and game theory to design \rules of interaction" for economic transactions that will, in principle , yield some desired outcome. In this paper I provide an overview of this subject for an audience interested in applications to electronic commerce and discuss some special problems that arise in this context.},
-   author = {Hal R Varian},
-   title = {Economic Mechanism Design for Computerized Agents}
-}
-@techReport{Mullapudi,
-   abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
-   author = {Pavan Mullapudi},
-   issue = {4},
-   journal = {International Journal on Science and Technology (IJSAT) IJSAT25049558},
-   keywords = {Index Terms: Dynamic Pricing,Markov Decision Process,Price Optimization,Q-Learning,Reinforcement Learning,Retail Analytics},
-   title = {A Reinforcement Learning Approach to Dynamic Pricing},
-   volume = {16}
-}
-@techReport{Kuhn2025,
-   abstract = {Distributionally robust optimization (DRO) studies decision problems under uncertainty where the probability distribution governing the uncertain problem parameters is itself uncertain. A key component of any DRO model is its ambiguity set, that is, a family of probability distributions consistent with any available structural or statistical information. DRO seeks decisions that perform best under the worst distribution in the ambiguity set. This worst case criterion is supported by findings in psychology and neuroscience, which indicate that many decision-makers have a low tolerance for distributional ambiguity. DRO is rooted in statistics, operations research and control theory, and recent research has uncovered its deep connections to regularization techniques and adversarial training in machine learning. This survey presents the key findings of the field in a unified and self-contained manner.},
-   author = {Daniel Kuhn and Soroosh Shafiee and Wolfram Wiesemann},
-   title = {Distributionally Robust Optimization},
-   year = {2025}
-}
-@article{Yokoo2004,
-   abstract = {We examine the effect of false-name bids on combinatorial auction protocols. False-name bids are bids submitted by a single bidder using multiple identifiers such as multiple e-mail addresses. The obtained results are summarized as follows: (1) the Vickrey-Clarke-Groves (VCG) mechanism, which is strategy-proof and Pareto efficient when there exists no false-name bid, is not false-name-proof; (2) there exists no false-name-proof combinatorial auction protocol that satisfies Pareto efficiency; (3) one sufficient condition where the VCG mechanism is false-name-proof is identified, i.e., the concavity of a surplus function over bidders. © 2003 Elsevier Inc. All rights reserved.},
-   author = {Makoto Yokoo and Yuko Sakurai and Shigeo Matsubara},
-   doi = {10.1016/S0899-8256(03)00045-9},
-   issn = {08998256},
-   issue = {1},
-   journal = {Games and Economic Behavior},
-   keywords = {Auction,Mechanism design,Strategy-proof},
-   pages = {174-188},
-   publisher = {Academic Press Inc.},
-   title = {The effect of false-name bids in combinatorial auctions: New fraud in internet auctions},
+@article{yokoo_effect_2004,
+	title = {The effect of false-name bids in combinatorial auctions: {New} fraud in internet auctions},
 	volume = {46},
-   year = {2004}
+	issn = {08998256},
+	doi = {10.1016/S0899-8256(03)00045-9},
+	abstract = {We examine the effect of false-name bids on combinatorial auction protocols. False-name bids are bids submitted by a single bidder using multiple identifiers such as multiple e-mail addresses. The obtained results are summarized as follows: (1) the Vickrey-Clarke-Groves (VCG) mechanism, which is strategy-proof and Pareto efficient when there exists no false-name bid, is not false-name-proof; (2) there exists no false-name-proof combinatorial auction protocol that satisfies Pareto efficiency; (3) one sufficient condition where the VCG mechanism is false-name-proof is identified, i.e., the concavity of a surplus function over bidders. © 2003 Elsevier Inc. All rights reserved.},
+	number = {1},
+	journal = {Games and Economic Behavior},
+	author = {Yokoo, Makoto and Sakurai, Yuko and Matsubara, Shigeo},
+	year = {2004},
+	note = {Publisher: Academic Press Inc.},
+	keywords = {Auction, Mechanism design, Strategy-proof},
+	pages = {174--188},
+	file = {PDF:/home/velocitatem/Zotero/storage/LUVQV6WT/Yokoo04.pdf:application/pdf},
 }

-@inproceedings{Feldman2004,
-   abstract = {We develop a model to study the phenomenon of free-riding in peer-to-peer (P2P) systems. At the heart of our model is a user of a certain type, an intrinsic and private parameter that reflects the user's willingness to contribute resources to the system. A user decides whether to contribute or free-ride based on how the current contribution cost in the system compares to her type. When the societal generosity (i.e., the average type) is low, intervention is required in order to sustain the system. We present the effect of mechanisms that exclude low type users or, more realistic, penalize free-riders with degraded service. We also consider dynamic scenarios with arrivals and departures of users, and with whitewashers: users who leave the system and rejoin with new identities to avoid reputational penalties. We find that when penalty is imposed on all newcomers in order to avoid whitewashing, system performance degrades significantly only when the turnover rate among users is high.},
-   author = {Michal Feldman and Christos Papadimitriou and John Chuang and Ion Stoica},
-   doi = {10.1145/1016527.1016539},
-   isbn = {158113942X},
-   booktitle = {Proceedings of the ACM SIGCOMM 2004 Workshops},
-   keywords = {Cheap pseudonyms,Cooperation,Equilibrium,Exclusion,Free-riding,Identity cost,Incentives,Peer-to-peer,Whitewashing},
-   pages = {228-235},
-   publisher = {Association for Computing Machinery},
+@inproceedings{feldman_free-riding_2004,
 	title = {Free-riding and whitewashing in peer-to-peer systems},
-   year = {2004}
+	isbn = {1-58113-942-X},
+	doi = {10.1145/1016527.1016539},
+	abstract = {We develop a model to study the phenomenon of free-riding in peer-to-peer (P2P) systems. At the heart of our model is a user of a certain type, an intrinsic and private parameter that reflects the user's willingness to contribute resources to the system. A user decides whether to contribute or free-ride based on how the current contribution cost in the system compares to her type. When the societal generosity (i.e., the average type) is low, intervention is required in order to sustain the system. We present the effect of mechanisms that exclude low type users or, more realistic, penalize free-riders with degraded service. We also consider dynamic scenarios with arrivals and departures of users, and with whitewashers: users who leave the system and rejoin with new identities to avoid reputational penalties. We find that when penalty is imposed on all newcomers in order to avoid whitewashing, system performance degrades significantly only when the turnover rate among users is high.},
+	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2004 {Workshops}},
+	publisher = {Association for Computing Machinery},
+	author = {Feldman, Michal and Papadimitriou, Christos and Chuang, John and Stoica, Ion},
+	year = {2004},
+	keywords = {Cheap pseudonyms, Cooperation, Equilibrium, Exclusion, Free-riding, Identity cost, Incentives, Peer-to-peer, Whitewashing},
+	pages = {228--235},
+	file = {PDF:/home/velocitatem/Zotero/storage/K32WH6SB/1016527.1016539.pdf:application/pdf},
 }

-@techReport{Roughgarden2013,
-   author = {Tim Roughgarden},
-   title = {CS364A: Algorithmic Game Theory Lecture \#5: Revenue-Maximizing Auctions *},
-   year = {2013}
+@article{calvano_artificial_2018,
+	title = {Artificial {Intelligence}, {Algorithmic} {Pricing} and {Collusion}},
+	url = {https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3304991},
+	doi = {10.2139/ssrn.3304991},
+	journal = {SSRN Electronic Journal},
+	author = {Calvano, Emilio and Calzolari, Giacomo and Denicolo, Vincenzo and Pastorello, Sergio},
+	year = {2018},
+	file = {PDF:/home/velocitatem/Zotero/storage/WYTSSZBR/ssrn-3304991.pdf:application/pdf},
+}
+
+@techreport{varian_economic_1995,
+	title = {Economic {Mechanism} {Design} for {Computerized} {Agents}},
+	abstract = {The eeld of economic mechanism design has been an active area of research in economics for at least 20 years. This eld uses the tools of economics and game theory to design {\textbackslash}rules of interaction" for economic transactions that will, in principle , yield some desired outcome. In this paper I provide an overview of this subject for an audience interested in applications to electronic commerce and discuss some special problems that arise in this context.},
+	author = {Varian, Hal R},
+	year = {1995},
+	file = {PDF:/home/velocitatem/Zotero/storage/S8635QX6/varian95a.pdf:application/pdf},
+}
+
+@book{russell_artificial_nodate,
+	title = {Artificial {Intelligence} {A} {Modern} {Approach} {Fourth} {Edition} {Global} {Edition}},
+	isbn = {978-1-292-40117-1},
+	author = {Russell, Stuart and Norvig, Peter},
+	file = {PDF:/home/velocitatem/Zotero/storage/6B8W8S27/efdd4d1d4c2087fe1cbe03d9ced67f34.pdf:application/pdf},
+}
+
+@techreport{wellman_price_2004,
+	title = {Price {Prediction} in a {Trading} {Agent} {Competition} {Yevgeniy} {Vorobeychik}},
+	abstract = {The 2002 Trading Agent Competition (TAC) presented a challenging market game in the domain of travel shopping. One of the pivotal issues in this domain is uncertainty about hotel prices, which have a significant influence on the relative cost of alternative trip schedules. Thus, virtually all participants employ some method for predicting hotel prices. We survey approaches employed in the tournament, finding that agents apply an interesting diversity of techniques, taking into account differing sources of evidence bearing on prices. Based on data provided by entrants on their agents' actual predictions in the TAC-02 finals and semifinals, we analyze the relative efficacy of these approaches. The results show that taking into account game-specific information about flight prices is a major distinguishing factor. Machine learning methods effectively induce the relationship between flight and hotel prices from game data, and a purely analytical approach based on competitive equilibrium analysis achieves equal accuracy with no historical data. Employing a new measure of prediction quality, we relate absolute accuracy to bottom-line performance in the game.},
+	author = {Wellman, Michael P and Reeves, Daniel M and Lochner, Kevin M and Edu, Yvorobey@umich},
+	year = {2004},
+	note = {Publication Title: Journal of Artificial Intelligence Research
+Volume: 21},
+	pages = {19--36},
+	file = {PDF:/home/velocitatem/Zotero/storage/N9JNXFJW/live-1333-2265-jair.pdf:application/pdf},
+}
+
+@techreport{shoham_multiagent_nodate,
+	title = {Multiagent {Systems}: {Algorithmic}, {Game}-{Theoretic}, and {Logical} {Foundations}},
+	url = {http://www.masfoundations.org.},
+	author = {Shoham, Yoav and Leyton-Brown, Kevin},
+	keywords = {algorithms, auctions, communication, competition, cooperation, distributed problem solving, game theory, learning, logic, mechanism design, social choice},
+	file = {PDF:/home/velocitatem/Zotero/storage/QZVYS7V9/shoham09a.pdf:application/pdf},
+}
+
+@article{xia_evaluation-driven_2025,
+	title = {Evaluation-{Driven} {Development} and {Operations} of {LLM} {Agents}: {A} {Process} {Model} and {Reference} {Architecture}},
+	url = {http://arxiv.org/abs/2411.13768},
+	abstract = {Large Language Models (LLMs) have enabled the emergence of LLM agents, systems capable of pursuing under-specified goals and adapting after deployment. Evaluating such agents is challenging because their behavior is open ended, probabilistic, and shaped by system-level interactions over time. Traditional evaluation methods, built around fixed benchmarks and static test suites, fail to capture emergent behaviors or support continuous adaptation across the lifecycle. To ground a more systematic approach, we conduct a multivocal literature review (MLR) synthesizing academic and industrial evaluation practices. The findings directly inform two empirically derived artifacts: a process model and a reference architecture that embed evaluation as a continuous, governing function rather than a terminal checkpoint. Together they constitute the evaluation-driven development and operations (EDDOps) approach, which unifies offline (development-time) and online (runtime) evaluation within a closed feedback loop. By making evaluation evidence drive both runtime adaptation and governed redevelopment, EDDOps supports safer, more traceable evolution of LLM agents aligned with changing objectives, user needs, and governance constraints.},
+	author = {Xia, Boming and Lu, Qinghua and Zhu, Liming and Xing, Zhenchang and Zhao, Dehai and Zhang, Hao},
+	month = nov,
+	year = {2025},
+	note = {arXiv: 2411.13768},
+	file = {PDF:/home/velocitatem/Zotero/storage/H8IS64AW/2411.13768v2.pdf:application/pdf},
+}
+
+@techreport{xie_osworld_nodate,
+	title = {{OSWORLD}: {Benchmarking} {Multimodal} {Agents} for {Open}-{Ended} {Tasks} in {Real} {Computer} {Environments}},
+	url = {https://os-world.github.io},
+	abstract = {Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWORLD, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWORLD can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWORLD, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWORLD reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36\% of the tasks, the best model achieves only 12.24\% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWORLD provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at https://os-world.github.io.},
+	author = {Xie, Tianbao and Zhang, Danyang and Chen, Jixuan and Li, Xiaochuan and Zhao, Siheng and Cao, Ruisheng and Jing Hua, Toh and Cheng, Zhoujun and Shin, Dongchan and Lei, Fangyu and Liu, Yitao and Xu, Yiheng and Zhou, Shuyan and Savarese, Silvio and Xiong, Caiming and Zhong, Victor and Yu, Tao},
+	note = {arXiv: 2404.07972v2},
+	file = {PDF:/home/velocitatem/Zotero/storage/LLRKXIC7/full-text.pdf:application/pdf},
+}
+
+@techreport{imperva_rapid_2025,
+	title = {The {Rapid} {Rise} of {Bots} and the {Unseen} {Risk} for {Business} \#{2025BADBOTREPORT}},
+	author = {{Imperva}},
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/AWR9IQRD/2025-Bad-Bot-Report.pdf:application/pdf},
+}
+
+@article{perez-ricardo_exploring_2025,
+	title = {Exploring booking intentions through price elasticity of demand in tourism accommodations using large-scale data analytics},
+	volume = {31},
+	issn = {24448834},
+	doi = {10.1016/j.iedeen.2025.100271},
+	abstract = {The study aims to explore tourists' booking intentions by analyzing the price elasticity of demand in tourist accommodations. This analysis should reveal how changes in price affect booking behavior across different customer segments, using online booking records. A dataset was compiled from 106 hotels in Malaga, Spain, comprising 27,910 online bookings sourced exclusively from hotel websites. To understand the price elasticity of demand, a simple log-log regression was applied, segmenting the data based on key revenue-related variables. Subsequently, a cluster segmentation was performed using the Elbow method and K-means algorithm to identify distinct market segments. The findings highlighted that Family Travelers and Short Stay Travelers segments exhibited elastic demand, indicating higher sensitivity to price fluctuations. In contrast, Early Bookers and Mid-Season Long Stayers demonstrated inelastic demand, with lower responsiveness to changes in tourist accommodation prices. The number of variables analyzed in this study, along with the cluster analysis, represent a novelty and contribute to the existing literature on market segmentation and price elasticity of demand. This integration enriches both fields of research, offering mutual benefits and deeper insights that enhance the understanding of booking intention and pricing strategies.},
+	number = {1},
+	urldate = {2025-11-28},
+	journal = {European Research on Management and Business Economics},
+	author = {Pérez-Ricardo, Elizabeth del Carmen and García-Mestanza, Josefa},
+	month = jan,
+	year = {2025},
+	note = {Publisher: European Academy of Management and Business Economics},
+	keywords = {Booking intention, Price elasticity, Tourist segmentation},
+	file = {PDF:/home/velocitatem/Zotero/storage/QNXZJLRM/S2444883425000038.pdf:application/pdf},
+}
+
+@misc{ghaffary_amazon_nodate,
+	title = {Amazon {Sues} to {Stop} {Perplexity} {From} {Using} {AI} {Tool} to {Buy} {Stuff}},
+	url = {https://www.bloomberg.com/news/articles/2025-11-04/amazon-demands-perplexity-stop-ai-agent-from-making-purchases},
+	author = {Ghaffary, Shirin and Day, Matt},
+	file = {PDF:/home/velocitatem/Zotero/storage/IQL6FPWE/Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff - Bloomberg.pdf:application/pdf},
+}
+
+@techreport{besbes_dynamic_nodate,
+	title = {Dynamic {Pricing} {Without} {Knowing} the {Demand} {Function}: {Risk} {Bounds} and {Near}-{Optimal} {Algorithms} *},
+	abstract = {We consider a single product revenue management problem where, given an initial inventory, the objective is to dynamically adjust prices over a finite sales horizon to maximize expected revenues. Realized demand is observed over time, but the underlying functional relationship between price and mean demand rate that governs these observations (otherwise known as the demand function or demand curve), is not known. We consider two instances of this problem: i.) a setting where the demand function is assumed to belong to a known parametric family with unknown parameter values; and ii.) a setting where the demand function is assumed to belong to a broad class of functions that need not admit any parametric representation. In each case we develop policies that learn the demand function "on the fly," and optimize prices based on that. The performance of these algorithms is measured in terms of the regret: the revenue loss relative to the maximal revenues that can be extracted when the demand function is known prior to the start of the selling season. We derive lower bounds on the regret that hold for any admissible pricing policy, and then show that our proposed algorithms achieve a regret that is "close" to this lower bound. The magnitude of the regret can be interpreted as the economic value of prior knowledge on the demand function; manifested as the revenue loss due to model uncertainty.},
+	author = {Besbes, Omar and Zeevi, Assaf},
+	note = {Publication Title: Operations Research},
+	keywords = {learning, asymptotic analysis, estimation, exploration-exploitation, pricing, Revenue management, value of information},
+	file = {PDF:/home/velocitatem/Zotero/storage/SBAIB4V2/Dp_wo_demand_risk_ob_az_posted.pdf:application/pdf},
+}
+
+@techreport{markntel_advisors_global_2025,
+	address = {Noida, Uttar Pradesh, India},
+	title = {Global {AI} {Agent} {Market} {Research} {Report}: {Forecast} (2026–2032)},
+	url = {https://www.marknteladvisors.com/research-library/ai-agent-market.html},
+	urldate = {2025-12-12},
+	institution = {MarkNtel Advisors},
+	author = {{MarkNtel Advisors}},
+	year = {2025},
+}
+
+@article{amjad_censored_2017,
+	title = {Censored {Demand} {Estimation} in {Retail}},
+	volume = {1},
+	url = {https://par.nsf.gov/servlets/purl/10066022},
+	doi = {10.1145/3154489},
+	abstract = {In this paper, the question of interest is estimating true demand of a product at a given store location and time period in the retail environment based on a single noisy and potentially censored observation. To address this question, we introduce a \%non-parametric framework to make inference from multiple time series. Somewhat surprisingly, we establish that the algorithm introduced for the purpose of "matrix completion" can be used to solve the relevant inference problem. Specifically, using the Universal Singular Value Thresholding (USVT) algorithm [7], we show that our estimator is consistent: the average mean squared error of the estimated average demand with respect to the true average demand goes to 0 as the number of store locations and time intervals increase to \${\textbackslash}infty\$. We establish naturally appealing properties of the resulting estimator both analytically as well as through a sequence of instructive simulations. Using a real dataset in retail (Walmart), we argue for the practical relevance of our approach.},
+	number = {2},
+	urldate = {2025-11-12},
+	journal = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
+	author = {Amjad, Muhammad J. and Shah, Devavrat},
+	month = dec,
+	year = {2017},
+	note = {Publisher: Association for Computing Machinery (ACM)},
+	pages = {1--28},
+	file = {PDF:/home/velocitatem/Zotero/storage/5ZYADDT4/10066022.pdf:application/pdf},
+}
+
+@misc{ganie_uncertainty_2025,
+	title = {Uncertainty in {Authorship}: {Why} {Perfect} {AI} {Detection} {Is} {Mathematically} {Impossible}},
+	shorttitle = {Uncertainty in {Authorship}},
+	url = {http://arxiv.org/abs/2509.11915},
+	doi = {10.48550/arXiv.2509.11915},
+	abstract = {As large language models (LLMs) become more advanced, it is increasingly difficult to distinguish between human-written and AI-generated text. This paper draws a conceptual parallel between quantum uncertainty and the limits of authorship detection in natural language. We argue that there is a fundamental trade-off: the more confidently one tries to identify whether a text was written by a human or an AI, the more one risks disrupting the text's natural flow and authenticity. This mirrors the tension between precision and disturbance found in quantum systems. We explore how current detection methods--such as stylometry, watermarking, and neural classifiers--face inherent limitations. Enhancing detection accuracy often leads to changes in the AI's output, making other features less reliable. In effect, the very act of trying to detect AI authorship introduces uncertainty elsewhere in the text. Our analysis shows that when AI-generated text closely mimics human writing, perfect detection becomes not just technologically difficult but theoretically impossible. We address counterarguments and discuss the broader implications for authorship, ethics, and policy. Ultimately, we suggest that the challenge of AI-text detection is not just a matter of better tools--it reflects a deeper, unavoidable tension in the nature of language itself.},
+	language = {en},
+	urldate = {2026-01-05},
+	publisher = {arXiv},
+	author = {Ganie, Aadil Gani},
+	month = sep,
+	year = {2025},
+	note = {arXiv:2509.11915 [cs]},
+	keywords = {Computer Science - Computation and Language},
+	file = {PDF:/home/velocitatem/Zotero/storage/3Z2XK4QC/Ganie - 2025 - Uncertainty in Authorship Why Perfect AI Detection Is Mathematically Impossible.pdf:application/pdf},
 }
--- a/paper/src/chapters/01-intro.tex
+++ b/paper/src/chapters/01-intro.tex
@@ -14,14 +14,14 @@ This research effort touches a large variety of domains, spanning behavioral eco

 \subsection{Motivation and Market Context}

-The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \cite{Xia2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \cite{Xie}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \cite{MarkNtelAdvisors2025}.
+The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \cite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \cite{xie_osworld_nodate}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \cite{markntel_advisors_global_2025}.

-The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \cite{Imperva2025}.
+The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \cite{imperva_rapid_2025}.

-The industry has already seen legal action in cases like Amazon against Perplexity \cite{AmazonvsPerplexity}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
+The industry has already seen legal action in cases like Amazon against Perplexity \cite{ghaffary_amazon_nodate}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.

 \subsection{Solution Space Overview}
-Dynamic pricing systems, as presented in \cite{Mueller2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented in \cite{Amjad2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.
+Dynamic pricing systems, as presented in \cite{mueller_low-rank_2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented in \cite{amjad_censored_2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.

 We formally define interaction data as coming from some actor which can either be an agent ($A$) or human ($H$). For purposes of this research, an agent is an algorithmic loop with the ability to access a web platform and perform actions such as clicks, scrolls, and input field fills. The loop terminates when the internal large language model judges the provided task definition as complete. A detailed breakdown can be found in \cref{algagent-loop}.

@@ -54,4 +54,4 @@ Extract final result $r$ from terminal state\;
 \end{algorithm}


-The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \cite{Kuhn2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
+The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \cite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -1,28 +1,28 @@
 \section{Literature Review}

-To better understand all wedges of the work, we must start by exploring the nature of agents and agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \cite{Yokoo2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \cite{Feldman2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
+To better understand all wedges of the work, we must start by exploring the nature of agents and agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \cite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \cite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.

 \subsection{Agent Taxonomy and Definitions}

-An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \cite{Russell} is further developed in an economic context by \cite{Parkes2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
-A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes. \cite{Xia2025}
+An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \cite{russell_artificial_nodate} is further developed in an economic context by \cite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
+A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes. \cite{xia_evaluation-driven_2025}

-We must however acknowledge the current SOTA as presented by OSWORLD simulations in \cite{Xie} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
+We must however acknowledge the current SOTA as presented by OSWORLD simulations in \cite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.

 We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.

 \subsection{Economic Agents: From Homo Economicus to Machina Economicus}

-Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{Parkes2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \cite{Varian}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes. \cite{Xie} Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement. \cite{Imperva2025} In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \cite{varian_economic_1995}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes. \cite{xie_osworld_nodate} Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement. \cite{imperva_rapid_2025} In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.

-This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \cite{Parkes2015}.
+This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \cite{parkes_economic_2015}.


 \subsection{Problem Evidence and Market Impact}

-The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown in \cite{Amjad2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data. \cite{Imperva2025}
+The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown in \cite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data. \cite{imperva_rapid_2025}

-When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy. \cite{Mullapudi}
+When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy. \cite{mullapudi_reinforcement_nodate}


 %Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
@@ -31,11 +31,14 @@ When dynamic pricing algorithms operate on highly contaminated or noisy data, th



-Early hints of exploration of prices in a standard English auction explored in Economic Mechanism Design for Computerized Agents which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.
+Early hints of exploration of prices in a standard English auction explored in \cite{varian_economic_nodate} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.

-Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
+Like in classical revenue-maximizing auctions \cite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.
+
+% Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
+
+% Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing.

-Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing.

 \subsection{Landscape of Existing Work}

--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -154,7 +154,7 @@ where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our
 We will for our offilne experimental intents generalize a master function for encompasing distinct demand estimation and pricing strategies.

 \begin{align}
-V(\cdot) = \max_{p_t} \min_{Q \in \mathcal{U}(\hat{d})}{\mathbb{E}_{d\sim Q} [p_t \times d(p_t, x_t ; \theta) + \psi V_{t+1}(\cdot)]} \\
+V(\cdot) = \max_{p_t} \min_{Q \in \mathcal{U}(\hat{d})}{\mathbb{E}_{d\sim Q} [p_t \times d(p_t, x_t ; \theta) + \psi V_{t+1}(\cdot)]}
 \end{align}

 We follow differnet substitutouns which will server as hyperparameters later on.
@@ -224,3 +224,5 @@ We also need to think about a policy like taxation to the agents Strategy-Proof
 Steve Burns, superior culliculus (face heuristics) we create this sort of part of the 'brain' + amortized inference.

 We could say that a DQN for example is the learnin subsystem and then within our reward mechanism or some other computational method we introduce a steering subsystem which acts as the proposed ``pricing heuristic'' against the given non human transaction data.
+
+\section{}